1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/ 3 #include <petscksp.h> 4 5 #define CG_GradientDescent 0 6 #define CG_HestenesStiefel 1 7 #define CG_FletcherReeves 2 8 #define CG_PolakRibierePolyak 3 9 #define CG_PolakRibierePlus 4 10 #define CG_DaiYuan 5 11 #define CG_HagerZhang 6 12 #define CG_DaiKou 7 13 #define CG_KouDai 8 14 #define CG_SSML_BFGS 9 15 #define CG_SSML_DFP 10 16 #define CG_SSML_BROYDEN 11 17 #define CG_PCGradientDescent 12 18 #define CGTypes 13 19 20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"}; 21 22 #define CG_AS_NONE 0 23 #define CG_AS_BERTSEKAS 1 24 #define CG_AS_SIZE 2 25 26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 27 28 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) 29 { 30 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 31 32 PetscFunctionBegin; 33 PetscCall(ISDestroy(&cg->inactive_old)); 34 if (cg->inactive_idx) { 35 PetscCall(ISDuplicate(cg->inactive_idx, &cg->inactive_old)); 36 PetscCall(ISCopy(cg->inactive_idx, cg->inactive_old)); 37 } 38 switch (asType) { 39 case CG_AS_NONE: 40 PetscCall(ISDestroy(&cg->inactive_idx)); 41 PetscCall(VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx)); 42 PetscCall(ISDestroy(&cg->active_idx)); 43 PetscCall(ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx)); 44 break; 45 46 case CG_AS_BERTSEKAS: 47 /* Use gradient descent to estimate the active set */ 48 PetscCall(VecCopy(cg->unprojected_gradient, cg->W)); 49 PetscCall(VecScale(cg->W, -1.0)); 50 PetscCall(TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, 51 &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx)); 52 break; 53 default: 54 break; 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step) 60 { 61 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 62 63 PetscFunctionBegin; 64 switch (asType) { 65 case CG_AS_NONE: 66 PetscCall(VecISSet(step, cg->active_idx, 0.0)); 67 break; 68 69 case CG_AS_BERTSEKAS: 70 PetscCall(TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step)); 71 break; 72 73 default: 74 break; 75 } 76 PetscFunctionReturn(0); 77 } 78 79 static PetscErrorCode TaoSolve_BNCG(Tao tao) 80 { 81 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 82 PetscReal step=1.0,gnorm,gnorm2, resnorm; 83 PetscInt nDiff; 84 85 PetscFunctionBegin; 86 /* Project the current point onto the feasible set */ 87 PetscCall(TaoComputeVariableBounds(tao)); 88 PetscCall(TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU)); 89 90 /* Project the initial point onto the feasible region */ 91 PetscCall(TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution)); 92 93 if (nDiff > 0 || !tao->recycle) { 94 PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 95 } 96 PetscCall(VecNorm(cg->unprojected_gradient,NORM_2,&gnorm)); 97 PetscCheck(!PetscIsInfOrNanReal(cg->f) && !PetscIsInfOrNanReal(gnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 98 99 /* Estimate the active set and compute the projected gradient */ 100 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 101 102 /* Project the gradient and calculate the norm */ 103 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 104 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 105 PetscCall(VecNorm(tao->gradient,NORM_2,&gnorm)); 106 gnorm2 = gnorm*gnorm; 107 108 /* Initialize counters */ 109 tao->niter = 0; 110 cg->ls_fails = cg->descent_error = 0; 111 cg->resets = -1; 112 cg->skipped_updates = cg->pure_gd_steps = 0; 113 cg->iter_quad = 0; 114 115 /* Convergence test at the starting point. */ 116 tao->reason = TAO_CONTINUE_ITERATING; 117 118 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 119 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 120 PetscCheck(!PetscIsInfOrNanReal(resnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 121 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 122 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 123 PetscCall((*tao->ops->convergencetest)(tao,tao->cnvP)); 124 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 125 /* Calculate initial direction. */ 126 if (!tao->recycle) { 127 /* We are not recycling a solution/history from a past TaoSolve */ 128 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 129 } 130 /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */ 131 while (1) { 132 /* Call general purpose update function */ 133 if (tao->ops->update) { 134 PetscCall((*tao->ops->update)(tao, tao->niter, tao->user_update)); 135 } 136 PetscCall(TaoBNCGConductIteration(tao, gnorm)); 137 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 138 } 139 } 140 141 static PetscErrorCode TaoSetUp_BNCG(Tao tao) 142 { 143 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 144 145 PetscFunctionBegin; 146 if (!tao->gradient) { 147 PetscCall(VecDuplicate(tao->solution,&tao->gradient)); 148 } 149 if (!tao->stepdirection) { 150 PetscCall(VecDuplicate(tao->solution,&tao->stepdirection)); 151 } 152 if (!cg->W) { 153 PetscCall(VecDuplicate(tao->solution,&cg->W)); 154 } 155 if (!cg->work) { 156 PetscCall(VecDuplicate(tao->solution,&cg->work)); 157 } 158 if (!cg->sk) { 159 PetscCall(VecDuplicate(tao->solution,&cg->sk)); 160 } 161 if (!cg->yk) { 162 PetscCall(VecDuplicate(tao->gradient,&cg->yk)); 163 } 164 if (!cg->X_old) { 165 PetscCall(VecDuplicate(tao->solution,&cg->X_old)); 166 } 167 if (!cg->G_old) { 168 PetscCall(VecDuplicate(tao->gradient,&cg->G_old)); 169 } 170 if (cg->diag_scaling) { 171 PetscCall(VecDuplicate(tao->solution,&cg->d_work)); 172 PetscCall(VecDuplicate(tao->solution,&cg->y_work)); 173 PetscCall(VecDuplicate(tao->solution,&cg->g_work)); 174 } 175 if (!cg->unprojected_gradient) { 176 PetscCall(VecDuplicate(tao->gradient,&cg->unprojected_gradient)); 177 } 178 if (!cg->unprojected_gradient_old) { 179 PetscCall(VecDuplicate(tao->gradient,&cg->unprojected_gradient_old)); 180 } 181 PetscCall(MatLMVMAllocate(cg->B, cg->sk, cg->yk)); 182 if (cg->pc) { 183 PetscCall(MatLMVMSetJ0(cg->B, cg->pc)); 184 } 185 PetscFunctionReturn(0); 186 } 187 188 static PetscErrorCode TaoDestroy_BNCG(Tao tao) 189 { 190 TAO_BNCG *cg = (TAO_BNCG*) tao->data; 191 192 PetscFunctionBegin; 193 if (tao->setupcalled) { 194 PetscCall(VecDestroy(&cg->W)); 195 PetscCall(VecDestroy(&cg->work)); 196 PetscCall(VecDestroy(&cg->X_old)); 197 PetscCall(VecDestroy(&cg->G_old)); 198 PetscCall(VecDestroy(&cg->unprojected_gradient)); 199 PetscCall(VecDestroy(&cg->unprojected_gradient_old)); 200 PetscCall(VecDestroy(&cg->g_work)); 201 PetscCall(VecDestroy(&cg->d_work)); 202 PetscCall(VecDestroy(&cg->y_work)); 203 PetscCall(VecDestroy(&cg->sk)); 204 PetscCall(VecDestroy(&cg->yk)); 205 } 206 PetscCall(ISDestroy(&cg->active_lower)); 207 PetscCall(ISDestroy(&cg->active_upper)); 208 PetscCall(ISDestroy(&cg->active_fixed)); 209 PetscCall(ISDestroy(&cg->active_idx)); 210 PetscCall(ISDestroy(&cg->inactive_idx)); 211 PetscCall(ISDestroy(&cg->inactive_old)); 212 PetscCall(ISDestroy(&cg->new_inactives)); 213 PetscCall(MatDestroy(&cg->B)); 214 if (cg->pc) { 215 PetscCall(MatDestroy(&cg->pc)); 216 } 217 PetscCall(PetscFree(tao->data)); 218 PetscFunctionReturn(0); 219 } 220 221 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao) 222 { 223 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 224 225 PetscFunctionBegin; 226 PetscOptionsHeadBegin(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization"); 227 PetscCall(PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type,NULL)); 228 if (cg->cg_type != CG_SSML_BFGS) cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */ 229 if (CG_GradientDescent == cg->cg_type) { 230 cg->cg_type = CG_PCGradientDescent; 231 /* Set scaling equal to none or, at best, scalar scaling. */ 232 cg->unscaled_restart = PETSC_TRUE; 233 cg->diag_scaling = PETSC_FALSE; 234 } 235 PetscCall(PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL)); 236 PetscCall(PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL)); 237 PetscCall(PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL)); 238 PetscCall(PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL)); 239 PetscCall(PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL)); 240 PetscCall(PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL)); 241 PetscCall(PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL)); 242 PetscCall(PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL)); 243 PetscCall(PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL)); 244 PetscCall(PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL)); 245 PetscCall(PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL)); 246 PetscCall(PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL)); 247 PetscCall(PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL)); 248 PetscCall(PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL)); 249 PetscCall(PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL)); 250 PetscCall(PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL)); 251 PetscCall(PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL)); 252 PetscCall(PetscOptionsBool("-tao_bncg_no_scaling","Disable all scaling except in restarts","",cg->no_scaling,&cg->no_scaling,NULL)); 253 if (cg->no_scaling) { 254 cg->diag_scaling = PETSC_FALSE; 255 cg->alpha = -1.0; 256 } 257 if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling) { /* Some more default options that appear to be good. */ 258 cg->neg_xi = PETSC_TRUE; 259 } 260 PetscCall(PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL)); 261 PetscCall(PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL)); 262 PetscCall(PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL)); 263 PetscCall(PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts","",cg->delta_min,&cg->delta_min,NULL)); 264 PetscCall(PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts","",cg->delta_max,&cg->delta_max,NULL)); 265 266 PetscOptionsHeadEnd(); 267 PetscCall(MatSetOptionsPrefix(cg->B, ((PetscObject)tao)->prefix)); 268 PetscCall(MatAppendOptionsPrefix(cg->B, "tao_bncg_")); 269 PetscCall(MatSetFromOptions(cg->B)); 270 PetscFunctionReturn(0); 271 } 272 273 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) 274 { 275 PetscBool isascii; 276 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 277 278 PetscFunctionBegin; 279 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 280 if (isascii) { 281 PetscCall(PetscViewerASCIIPushTab(viewer)); 282 PetscCall(PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type])); 283 PetscCall(PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %" PetscInt_FMT "\n", cg->skipped_updates)); 284 PetscCall(PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %" PetscInt_FMT "\n", cg->resets)); 285 PetscCall(PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %" PetscInt_FMT "\n", cg->pure_gd_steps)); 286 PetscCall(PetscViewerASCIIPrintf(viewer, "Not a descent direction: %" PetscInt_FMT "\n", cg->descent_error)); 287 PetscCall(PetscViewerASCIIPrintf(viewer, "Line search fails: %" PetscInt_FMT "\n", cg->ls_fails)); 288 if (cg->diag_scaling) { 289 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); 290 if (isascii) { 291 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 292 PetscCall(MatView(cg->B, viewer)); 293 PetscCall(PetscViewerPopFormat(viewer)); 294 } 295 } 296 PetscCall(PetscViewerASCIIPopTab(viewer)); 297 } 298 PetscFunctionReturn(0); 299 } 300 301 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha) 302 { 303 PetscReal a, b, c, sig1, sig2; 304 305 PetscFunctionBegin; 306 *scale = 0.0; 307 if (1.0 == alpha) *scale = yts/yty; 308 else if (0.0 == alpha) *scale = sts/yts; 309 else if (-1.0 == alpha) *scale = 1.0; 310 else { 311 a = yty; 312 b = yts; 313 c = sts; 314 a *= alpha; 315 b *= -(2.0*alpha - 1.0); 316 c *= alpha - 1.0; 317 sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 318 sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 319 /* accept the positive root as the scalar */ 320 if (sig1 > 0.0) *scale = sig1; 321 else if (sig2 > 0.0) *scale = sig2; 322 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar"); 323 } 324 PetscFunctionReturn(0); 325 } 326 327 /*MC 328 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 329 330 Options Database Keys: 331 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled) 332 . -tao_bncg_eta <r> - restart tolerance 333 . -tao_bncg_type <taocg_type> - cg formula 334 . -tao_bncg_as_type <none,bertsekas> - active set estimation method 335 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation 336 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation 337 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision. 338 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method 339 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 340 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 341 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method 342 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method 343 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method 344 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method 345 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True. 346 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods 347 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts 348 . -tao_bncg_zeta <r> - Scaling parameter in the KD method 349 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps 350 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps 351 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart 352 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem 353 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations 354 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults. 355 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions 356 357 Notes: 358 CG formulas are: 359 + "gd" - Gradient Descent 360 . "fr" - Fletcher-Reeves 361 . "pr" - Polak-Ribiere-Polyak 362 . "prp" - Polak-Ribiere-Plus 363 . "hs" - Hestenes-Steifel 364 . "dy" - Dai-Yuan 365 . "ssml_bfgs" - Self-Scaling Memoryless BFGS 366 . "ssml_dfp" - Self-Scaling Memoryless DFP 367 . "ssml_brdn" - Self-Scaling Memoryless Broyden 368 . "hz" - Hager-Zhang (CG_DESCENT 5.3) 369 . "dk" - Dai-Kou (2013) 370 - "kd" - Kou-Dai (2015) 371 372 Level: beginner 373 M*/ 374 375 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) 376 { 377 TAO_BNCG *cg; 378 const char *morethuente_type = TAOLINESEARCHMT; 379 380 PetscFunctionBegin; 381 tao->ops->setup = TaoSetUp_BNCG; 382 tao->ops->solve = TaoSolve_BNCG; 383 tao->ops->view = TaoView_BNCG; 384 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 385 tao->ops->destroy = TaoDestroy_BNCG; 386 387 /* Override default settings (unless already changed) */ 388 if (!tao->max_it_changed) tao->max_it = 2000; 389 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 390 391 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 392 /* method. In particular, gtol should be less that 0.5; the value used in */ 393 /* Nocedal and Wright is 0.10. We use the default values for the */ 394 /* linesearch because it seems to work better. */ 395 PetscCall(TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch)); 396 PetscCall(PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1)); 397 PetscCall(TaoLineSearchSetType(tao->linesearch, morethuente_type)); 398 PetscCall(TaoLineSearchUseTaoRoutines(tao->linesearch, tao)); 399 400 PetscCall(PetscNewLog(tao,&cg)); 401 tao->data = (void*)cg; 402 PetscCall(KSPInitializePackage()); 403 PetscCall(MatCreate(PetscObjectComm((PetscObject)tao), &cg->B)); 404 PetscCall(PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1)); 405 PetscCall(MatSetType(cg->B, MATLMVMDIAGBROYDEN)); 406 407 cg->pc = NULL; 408 409 cg->dk_eta = 0.5; 410 cg->hz_eta = 0.4; 411 cg->dynamic_restart = PETSC_FALSE; 412 cg->unscaled_restart = PETSC_FALSE; 413 cg->no_scaling = PETSC_FALSE; 414 cg->delta_min = 1e-7; 415 cg->delta_max = 100; 416 cg->theta = 1.0; 417 cg->hz_theta = 1.0; 418 cg->dfp_scale = 1.0; 419 cg->bfgs_scale = 1.0; 420 cg->zeta = 0.1; 421 cg->min_quad = 6; 422 cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/ 423 cg->xi = 1.0; 424 cg->neg_xi = PETSC_TRUE; 425 cg->spaced_restart = PETSC_FALSE; 426 cg->tol_quad = 1e-8; 427 cg->as_step = 0.001; 428 cg->as_tol = 0.001; 429 cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/ 430 cg->as_type = CG_AS_BERTSEKAS; 431 cg->cg_type = CG_SSML_BFGS; 432 cg->alpha = 1.0; 433 cg->diag_scaling = PETSC_TRUE; 434 PetscFunctionReturn(0); 435 } 436 437 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq) 438 { 439 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 440 PetscReal scaling; 441 442 PetscFunctionBegin; 443 ++cg->resets; 444 scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23); 445 scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling)); 446 if (cg->unscaled_restart) { 447 scaling = 1.0; 448 ++cg->pure_gd_steps; 449 } 450 PetscCall(VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient)); 451 /* Also want to reset our diagonal scaling with each restart */ 452 if (cg->diag_scaling) { 453 PetscCall(MatLMVMReset(cg->B, PETSC_FALSE)); 454 } 455 PetscFunctionReturn(0); 456 } 457 458 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold) 459 { 460 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 461 PetscReal quadinterp; 462 463 PetscFunctionBegin; 464 if (cg->f < cg->min_quad/10) { 465 *dynrestart = PETSC_FALSE; 466 PetscFunctionReturn(0); 467 } /* just skip this since this strategy doesn't work well for functions near zero */ 468 quadinterp = 2.0*(cg->f - fold)/(stepsize*(gd + gd_old)); 469 if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad; 470 else { 471 cg->iter_quad = 0; 472 *dynrestart = PETSC_FALSE; 473 } 474 if (cg->iter_quad >= cg->min_quad) { 475 cg->iter_quad = 0; 476 *dynrestart = PETSC_TRUE; 477 } 478 PetscFunctionReturn(0); 479 } 480 481 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback) 482 { 483 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 484 PetscReal gamma = 1.0, tau_k, beta; 485 PetscReal tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk=1.0, gd; 486 PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg; 487 PetscInt dim; 488 PetscBool cg_restart = PETSC_FALSE; 489 PetscFunctionBegin; 490 491 /* Local curvature check to see if we need to restart */ 492 if (tao->niter >= 1 || tao->recycle) { 493 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 494 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 495 ynorm2 = ynorm*ynorm; 496 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 497 if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON) { 498 cg_restart = PETSC_TRUE; 499 ++cg->skipped_updates; 500 } 501 if (cg->spaced_restart) { 502 PetscCall(VecGetSize(tao->gradient, &dim)); 503 if (tao->niter % (dim*cg->min_restart_num)) cg_restart = PETSC_TRUE; 504 } 505 } 506 /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */ 507 if (cg->spaced_restart) { 508 PetscCall(VecGetSize(tao->gradient, &dim)); 509 if (0 == tao->niter % (6*dim)) cg_restart = PETSC_TRUE; 510 } 511 /* Compute the diagonal scaling vector if applicable */ 512 if (cg->diag_scaling) { 513 PetscCall(MatLMVMUpdate(cg->B, tao->solution, tao->gradient)); 514 } 515 516 /* A note on diagonal scaling (to be added to paper): 517 For the FR, PR, PRP, and DY methods, the diagonally scaled versions 518 must be derived as a preconditioned CG method rather than as 519 a Hessian initialization like in the Broyden methods. */ 520 521 /* In that case, one writes the objective function as 522 f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k). 523 Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to 524 HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the 525 same under preconditioning. Note that A is diagonal, such that A^T = A. */ 526 527 /* This yields questions like what the dot product d_k^T y_k 528 should look like. HZ mistakenly treats that as the same under 529 preconditioning, but that is not necessarily true. */ 530 531 /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation, 532 we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}), 533 yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is 534 NOT the same if our preconditioning matrix is updated between iterations. 535 This same issue is found when considering dot products of the form g_{k+1}^T y_k. */ 536 537 /* Compute CG step direction */ 538 if (cg_restart) { 539 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 540 } else if (pcgd_fallback) { 541 /* Just like preconditioned CG */ 542 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 543 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 544 } else if (ynorm2 > PETSC_MACHINE_EPSILON) { 545 switch (cg->cg_type) { 546 case CG_PCGradientDescent: 547 if (!cg->diag_scaling) { 548 if (!cg->no_scaling) { 549 cg->sts = step*step*dnorm*dnorm; 550 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha)); 551 } else { 552 tau_k = 1.0; 553 ++cg->pure_gd_steps; 554 } 555 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient)); 556 } else { 557 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 558 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 559 } 560 break; 561 562 case CG_HestenesStiefel: 563 /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */ 564 if (!cg->diag_scaling) { 565 cg->sts = step*step*dnorm*dnorm; 566 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 567 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha)); 568 beta = tau_k*gkp1_yk/dk_yk; 569 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 570 } else { 571 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 572 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 573 beta = gkp1_yk/dk_yk; 574 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 575 } 576 break; 577 578 case CG_FletcherReeves: 579 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 580 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 581 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 582 ynorm2 = ynorm*ynorm; 583 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 584 if (!cg->diag_scaling) { 585 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha)); 586 beta = tau_k*gnorm2/gnorm2_old; 587 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 588 } else { 589 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Before it's updated */ 590 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 591 PetscCall(VecDot(tao->gradient, cg->g_work, &tmp)); 592 beta = tmp/gnorm2_old; 593 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 594 } 595 break; 596 597 case CG_PolakRibierePolyak: 598 snorm = step*dnorm; 599 if (!cg->diag_scaling) { 600 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 601 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 602 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha)); 603 beta = tau_k*gkp1_yk/gnorm2_old; 604 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 605 } else { 606 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); 607 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 608 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 609 beta = gkp1_yk/gnorm2_old; 610 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 611 } 612 break; 613 614 case CG_PolakRibierePlus: 615 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 616 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 617 ynorm2 = ynorm*ynorm; 618 if (!cg->diag_scaling) { 619 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 620 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 621 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha)); 622 beta = tau_k*gkp1_yk/gnorm2_old; 623 beta = PetscMax(beta, 0.0); 624 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 625 } else { 626 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Old gtDg */ 627 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 628 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 629 beta = gkp1_yk/gnorm2_old; 630 beta = PetscMax(beta, 0.0); 631 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 632 } 633 break; 634 635 case CG_DaiYuan: 636 /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property." 637 SIAM Journal on optimization 10, no. 1 (1999): 177-182. */ 638 if (!cg->diag_scaling) { 639 PetscCall(VecDot(tao->stepdirection, tao->gradient, &gd)); 640 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 641 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha)); 642 beta = tau_k*gnorm2/(gd - gd_old); 643 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 644 } else { 645 PetscCall(MatMult(cg->B, tao->stepdirection, cg->d_work)); 646 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 647 PetscCall(VecDot(cg->g_work, tao->gradient, >Dg)); 648 PetscCall(VecDot(tao->stepdirection, cg->G_old, &gd_old)); 649 PetscCall(VecDot(cg->d_work, cg->g_work, &dk_yk)); 650 dk_yk = dk_yk - gd_old; 651 beta = gtDg/dk_yk; 652 PetscCall(VecScale(cg->d_work, beta)); 653 PetscCall(VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work)); 654 } 655 break; 656 657 case CG_HagerZhang: 658 /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent." 659 ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */ 660 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 661 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 662 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 663 snorm = dnorm*step; 664 cg->yts = step*dk_yk; 665 if (cg->use_dynamic_restart) { 666 PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 667 } 668 if (cg->dynamic_restart) { 669 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 670 } else { 671 if (!cg->diag_scaling) { 672 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 673 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 674 /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */ 675 tmp = gd/dk_yk; 676 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)); 677 /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */ 678 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 679 /* d <- -t*g + beta*t*d */ 680 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 681 } else { 682 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 683 cg->yty = ynorm2; 684 cg->sts = snorm*snorm; 685 /* Apply the diagonal scaling to all my vectors */ 686 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 687 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 688 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 689 /* Construct the constant ytDgkp1 */ 690 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 691 /* Construct the constant for scaling Dkyk in the update */ 692 tmp = gd/dk_yk; 693 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 694 tau_k = -tau_k*gd/(dk_yk*dk_yk); 695 /* beta is the constant which adds the dk contribution */ 696 beta = gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */ 697 /* From HZ2013, modified to account for diagonal scaling*/ 698 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 699 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 700 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 701 /* Do the update */ 702 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 703 } 704 } 705 break; 706 707 case CG_DaiKou: 708 /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search." 709 SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */ 710 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 711 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 712 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 713 snorm = step*dnorm; 714 cg->yts = dk_yk*step; 715 if (!cg->diag_scaling) { 716 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 717 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 718 /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */ 719 tmp = gd/dk_yk; 720 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk; 721 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 722 /* d <- -t*g + beta*t*d */ 723 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk)); 724 } else { 725 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 726 cg->yty = ynorm2; 727 cg->sts = snorm*snorm; 728 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 729 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 730 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 731 /* Construct the constant ytDgkp1 */ 732 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 733 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 734 tau_k = tau_k*gd/(dk_yk*dk_yk); 735 tmp = gd/dk_yk; 736 /* beta is the constant which adds the dk contribution */ 737 beta = gkp1_yk/dk_yk - step*tmp - tau_k; 738 /* Update this for the last term in beta */ 739 PetscCall(VecDot(cg->y_work, tao->stepdirection, &dk_yk)); 740 beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */ 741 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 742 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 743 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 744 /* Do the update */ 745 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 746 } 747 break; 748 749 case CG_KouDai: 750 /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization." 751 Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */ 752 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 753 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 754 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 755 snorm = step*dnorm; 756 cg->yts = dk_yk*step; 757 if (cg->use_dynamic_restart) { 758 PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 759 } 760 if (cg->dynamic_restart) { 761 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 762 } else { 763 if (!cg->diag_scaling) { 764 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 765 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 766 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 767 if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */ 768 { 769 beta = cg->zeta*tau_k*gd/(dnorm*dnorm); 770 gamma = 0.0; 771 } else { 772 if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk; 773 /* This seems to be very effective when there's no tau_k scaling. 774 This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */ 775 else { 776 gamma = cg->xi*gd/dk_yk; 777 } 778 } 779 /* d <- -t*g + beta*t*d + t*tmp*yk */ 780 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk)); 781 } else { 782 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 783 cg->yty = ynorm2; 784 cg->sts = snorm*snorm; 785 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 786 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 787 /* Construct the constant ytDgkp1 */ 788 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1D_yk)); 789 /* Construct the constant for scaling Dkyk in the update */ 790 gamma = gd/dk_yk; 791 /* tau_k = -ytDy/(ytd)^2 * gd */ 792 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 793 tau_k = tau_k*gd/(dk_yk*dk_yk); 794 /* beta is the constant which adds the d_k contribution */ 795 beta = gkp1D_yk/dk_yk - step*gamma - tau_k; 796 /* Here is the requisite check */ 797 PetscCall(VecDot(tao->stepdirection, cg->g_work, &tmp)); 798 if (cg->neg_xi) { 799 /* modified KD implementation */ 800 if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk; 801 else { 802 gamma = cg->xi*gd/dk_yk; 803 } 804 if (beta < cg->zeta*tmp/(dnorm*dnorm)) { 805 beta = cg->zeta*tmp/(dnorm*dnorm); 806 gamma = 0.0; 807 } 808 } else { /* original KD 2015 implementation */ 809 if (beta < cg->zeta*tmp/(dnorm*dnorm)) { 810 beta = cg->zeta*tmp/(dnorm*dnorm); 811 gamma = 0.0; 812 } else { 813 gamma = cg->xi*gd/dk_yk; 814 } 815 } 816 /* Do the update in two steps */ 817 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 818 PetscCall(VecAXPY(tao->stepdirection, gamma, cg->y_work)); 819 } 820 } 821 break; 822 823 case CG_SSML_BFGS: 824 /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory." 825 Discussion Papers 269 (1977). */ 826 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 827 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 828 snorm = step*dnorm; 829 cg->yts = dk_yk*step; 830 cg->yty = ynorm2; 831 cg->sts = snorm*snorm; 832 if (!cg->diag_scaling) { 833 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 834 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 835 tmp = gd/dk_yk; 836 beta = tau_k*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*tmp; 837 /* d <- -t*g + beta*t*d + t*tmp*yk */ 838 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk)); 839 } else { 840 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 841 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 842 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 843 /* compute scalar gamma */ 844 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 845 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 846 gamma = gd/dk_yk; 847 /* Compute scalar beta */ 848 beta = (gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 849 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 850 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 851 } 852 break; 853 854 case CG_SSML_DFP: 855 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 856 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 857 snorm = step*dnorm; 858 cg->yts = dk_yk*step; 859 cg->yty = ynorm2; 860 cg->sts = snorm*snorm; 861 if (!cg->diag_scaling) { 862 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 863 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 864 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 865 tau_k = cg->dfp_scale*tau_k; 866 tmp = tau_k*gkp1_yk/cg->yty; 867 beta = -step*gd/dk_yk; 868 /* d <- -t*g + beta*d + tmp*yk */ 869 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 870 } else { 871 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */ 872 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 873 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 874 /* compute scalar gamma */ 875 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 876 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 877 gamma = (gkp1_yk/tmp); 878 /* Compute scalar beta */ 879 beta = -step*gd/dk_yk; 880 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 881 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 882 } 883 break; 884 885 case CG_SSML_BROYDEN: 886 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 887 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 888 snorm = step*dnorm; 889 cg->yts = step*dk_yk; 890 cg->yty = ynorm2; 891 cg->sts = snorm*snorm; 892 if (!cg->diag_scaling) { 893 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 894 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale)); 895 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale)); 896 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 897 tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp; 898 /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0, 899 it should reproduce the tau_dfp scaling. Same with dfp_scale. */ 900 tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/cg->yty; 901 beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 902 /* d <- -t*g + beta*d + tmp*yk */ 903 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 904 } else { 905 /* We have diagonal scaling enabled */ 906 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 907 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 908 /* compute scalar gamma */ 909 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 910 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 911 gamma = cg->theta*gd/dk_yk + (1-cg->theta)*(gkp1_yk/tmp); 912 /* Compute scalar beta */ 913 beta = cg->theta*(gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 914 /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 915 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 916 } 917 break; 918 919 default: 920 break; 921 922 } 923 } 924 PetscFunctionReturn(0); 925 } 926 927 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm) 928 { 929 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 930 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 931 PetscReal step=1.0,gnorm2,gd,dnorm=0.0; 932 PetscReal gnorm2_old,f_old,resnorm, gnorm_old; 933 PetscBool pcgd_fallback = PETSC_FALSE; 934 935 PetscFunctionBegin; 936 /* We are now going to perform a line search along the direction. */ 937 /* Store solution and gradient info before it changes */ 938 PetscCall(VecCopy(tao->solution, cg->X_old)); 939 PetscCall(VecCopy(tao->gradient, cg->G_old)); 940 PetscCall(VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old)); 941 942 gnorm_old = gnorm; 943 gnorm2_old = gnorm_old*gnorm_old; 944 f_old = cg->f; 945 /* Perform bounded line search. If we are recycling a solution from a previous */ 946 /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */ 947 if (!(tao->recycle && 0 == tao->niter)) { 948 /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */ 949 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 950 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 951 PetscCall(TaoAddLineSearchCounts(tao)); 952 953 /* Check linesearch failure */ 954 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 955 ++cg->ls_fails; 956 if (cg->cg_type == CG_GradientDescent) { 957 /* Nothing left to do but fail out of the optimization */ 958 step = 0.0; 959 tao->reason = TAO_DIVERGED_LS_FAILURE; 960 } else { 961 /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */ 962 PetscCall(VecCopy(cg->X_old, tao->solution)); 963 PetscCall(VecCopy(cg->G_old, tao->gradient)); 964 PetscCall(VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient)); 965 gnorm = gnorm_old; 966 gnorm2 = gnorm2_old; 967 cg->f = f_old; 968 969 /* Fall back on preconditioned CG (so long as you're not already using it) */ 970 if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling) { 971 pcgd_fallback = PETSC_TRUE; 972 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 973 974 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 975 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 976 977 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 978 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 979 PetscCall(TaoAddLineSearchCounts(tao)); 980 981 pcgd_fallback = PETSC_FALSE; 982 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 983 /* Going to perform a regular gradient descent step. */ 984 ++cg->ls_fails; 985 step = 0.0; 986 } 987 } 988 /* Fall back on the scaled gradient step */ 989 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 990 ++cg->ls_fails; 991 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 992 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 993 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 994 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 995 PetscCall(TaoAddLineSearchCounts(tao)); 996 } 997 998 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 999 /* Nothing left to do but fail out of the optimization */ 1000 ++cg->ls_fails; 1001 step = 0.0; 1002 tao->reason = TAO_DIVERGED_LS_FAILURE; 1003 } else { 1004 /* One of the fallbacks worked. Set them both back equal to false. */ 1005 pcgd_fallback = PETSC_FALSE; 1006 } 1007 } 1008 } 1009 /* Convergence test for line search failure */ 1010 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1011 1012 /* Standard convergence test */ 1013 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 1014 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 1015 PetscCheck(!PetscIsInfOrNanReal(resnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 1016 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 1017 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 1018 PetscCall((*tao->ops->convergencetest)(tao,tao->cnvP)); 1019 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1020 } 1021 /* Assert we have an updated step and we need at least one more iteration. */ 1022 /* Calculate the next direction */ 1023 /* Estimate the active set at the new solution */ 1024 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 1025 /* Compute the projected gradient and its norm */ 1026 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 1027 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 1028 PetscCall(VecNorm(tao->gradient,NORM_2,&gnorm)); 1029 gnorm2 = gnorm*gnorm; 1030 1031 /* Calculate some quantities used in the StepDirectionUpdate. */ 1032 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 1033 /* Update the step direction. */ 1034 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 1035 ++tao->niter; 1036 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 1037 1038 if (cg->cg_type != CG_GradientDescent) { 1039 /* Figure out which previously active variables became inactive this iteration */ 1040 PetscCall(ISDestroy(&cg->new_inactives)); 1041 if (cg->inactive_idx && cg->inactive_old) { 1042 PetscCall(ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives)); 1043 } 1044 /* Selectively reset the CG step those freshly inactive variables */ 1045 if (cg->new_inactives) { 1046 PetscCall(VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 1047 PetscCall(VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 1048 PetscCall(VecCopy(cg->inactive_grad, cg->inactive_step)); 1049 PetscCall(VecScale(cg->inactive_step, -1.0)); 1050 PetscCall(VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 1051 PetscCall(VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 1052 } 1053 /* Verify that this is a descent direction */ 1054 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 1055 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 1056 if (PetscIsInfOrNanReal(gd) || (gd/(dnorm*dnorm) <= -1e10 || gd/(dnorm*dnorm) >= -1e-10)) { 1057 /* Not a descent direction, so we reset back to projected gradient descent */ 1058 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 1059 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 1060 ++cg->descent_error; 1061 } else { 1062 } 1063 } 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0) 1068 { 1069 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 1070 1071 PetscFunctionBegin; 1072 PetscCall(PetscObjectReference((PetscObject)H0)); 1073 cg->pc = H0; 1074 PetscFunctionReturn(0); 1075 } 1076