1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/ 3 #include <petscksp.h> 4 5 #define CG_GradientDescent 0 6 #define CG_HestenesStiefel 1 7 #define CG_FletcherReeves 2 8 #define CG_PolakRibierePolyak 3 9 #define CG_PolakRibierePlus 4 10 #define CG_DaiYuan 5 11 #define CG_HagerZhang 6 12 #define CG_DaiKou 7 13 #define CG_KouDai 8 14 #define CG_SSML_BFGS 9 15 #define CG_SSML_DFP 10 16 #define CG_SSML_BROYDEN 11 17 #define CG_PCGradientDescent 12 18 #define CGTypes 13 19 20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"}; 21 22 #define CG_AS_NONE 0 23 #define CG_AS_BERTSEKAS 1 24 #define CG_AS_SIZE 2 25 26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 27 28 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) 29 { 30 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 31 32 PetscFunctionBegin; 33 PetscCall(ISDestroy(&cg->inactive_old)); 34 if (cg->inactive_idx) { 35 PetscCall(ISDuplicate(cg->inactive_idx, &cg->inactive_old)); 36 PetscCall(ISCopy(cg->inactive_idx, cg->inactive_old)); 37 } 38 switch (asType) { 39 case CG_AS_NONE: 40 PetscCall(ISDestroy(&cg->inactive_idx)); 41 PetscCall(VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx)); 42 PetscCall(ISDestroy(&cg->active_idx)); 43 PetscCall(ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx)); 44 break; 45 46 case CG_AS_BERTSEKAS: 47 /* Use gradient descent to estimate the active set */ 48 PetscCall(VecCopy(cg->unprojected_gradient, cg->W)); 49 PetscCall(VecScale(cg->W, -1.0)); 50 PetscCall(TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, 51 &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx)); 52 break; 53 default: 54 break; 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step) 60 { 61 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 62 63 PetscFunctionBegin; 64 switch (asType) { 65 case CG_AS_NONE: 66 PetscCall(VecISSet(step, cg->active_idx, 0.0)); 67 break; 68 69 case CG_AS_BERTSEKAS: 70 PetscCall(TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step)); 71 break; 72 73 default: 74 break; 75 } 76 PetscFunctionReturn(0); 77 } 78 79 static PetscErrorCode TaoSolve_BNCG(Tao tao) 80 { 81 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 82 PetscReal step=1.0,gnorm,gnorm2, resnorm; 83 PetscInt nDiff; 84 85 PetscFunctionBegin; 86 /* Project the current point onto the feasible set */ 87 PetscCall(TaoComputeVariableBounds(tao)); 88 PetscCall(TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU)); 89 90 /* Project the initial point onto the feasible region */ 91 PetscCall(TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution)); 92 93 if (nDiff > 0 || !tao->recycle) { 94 PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 95 } 96 PetscCall(VecNorm(cg->unprojected_gradient,NORM_2,&gnorm)); 97 PetscCheck(!PetscIsInfOrNanReal(cg->f) && !PetscIsInfOrNanReal(gnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 98 99 /* Estimate the active set and compute the projected gradient */ 100 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 101 102 /* Project the gradient and calculate the norm */ 103 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 104 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 105 PetscCall(VecNorm(tao->gradient,NORM_2,&gnorm)); 106 gnorm2 = gnorm*gnorm; 107 108 /* Initialize counters */ 109 tao->niter = 0; 110 cg->ls_fails = cg->descent_error = 0; 111 cg->resets = -1; 112 cg->skipped_updates = cg->pure_gd_steps = 0; 113 cg->iter_quad = 0; 114 115 /* Convergence test at the starting point. */ 116 tao->reason = TAO_CONTINUE_ITERATING; 117 118 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 119 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 120 PetscCheck(!PetscIsInfOrNanReal(resnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 121 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 122 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 123 PetscCall((*tao->ops->convergencetest)(tao,tao->cnvP)); 124 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 125 /* Calculate initial direction. */ 126 if (!tao->recycle) { 127 /* We are not recycling a solution/history from a past TaoSolve */ 128 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 129 } 130 /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */ 131 while (1) { 132 /* Call general purpose update function */ 133 if (tao->ops->update) { 134 PetscCall((*tao->ops->update)(tao, tao->niter, tao->user_update)); 135 PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 136 } 137 PetscCall(TaoBNCGConductIteration(tao, gnorm)); 138 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 139 } 140 } 141 142 static PetscErrorCode TaoSetUp_BNCG(Tao tao) 143 { 144 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 145 146 PetscFunctionBegin; 147 if (!tao->gradient) { 148 PetscCall(VecDuplicate(tao->solution,&tao->gradient)); 149 } 150 if (!tao->stepdirection) { 151 PetscCall(VecDuplicate(tao->solution,&tao->stepdirection)); 152 } 153 if (!cg->W) { 154 PetscCall(VecDuplicate(tao->solution,&cg->W)); 155 } 156 if (!cg->work) { 157 PetscCall(VecDuplicate(tao->solution,&cg->work)); 158 } 159 if (!cg->sk) { 160 PetscCall(VecDuplicate(tao->solution,&cg->sk)); 161 } 162 if (!cg->yk) { 163 PetscCall(VecDuplicate(tao->gradient,&cg->yk)); 164 } 165 if (!cg->X_old) { 166 PetscCall(VecDuplicate(tao->solution,&cg->X_old)); 167 } 168 if (!cg->G_old) { 169 PetscCall(VecDuplicate(tao->gradient,&cg->G_old)); 170 } 171 if (cg->diag_scaling) { 172 PetscCall(VecDuplicate(tao->solution,&cg->d_work)); 173 PetscCall(VecDuplicate(tao->solution,&cg->y_work)); 174 PetscCall(VecDuplicate(tao->solution,&cg->g_work)); 175 } 176 if (!cg->unprojected_gradient) { 177 PetscCall(VecDuplicate(tao->gradient,&cg->unprojected_gradient)); 178 } 179 if (!cg->unprojected_gradient_old) { 180 PetscCall(VecDuplicate(tao->gradient,&cg->unprojected_gradient_old)); 181 } 182 PetscCall(MatLMVMAllocate(cg->B, cg->sk, cg->yk)); 183 if (cg->pc) { 184 PetscCall(MatLMVMSetJ0(cg->B, cg->pc)); 185 } 186 PetscFunctionReturn(0); 187 } 188 189 static PetscErrorCode TaoDestroy_BNCG(Tao tao) 190 { 191 TAO_BNCG *cg = (TAO_BNCG*) tao->data; 192 193 PetscFunctionBegin; 194 if (tao->setupcalled) { 195 PetscCall(VecDestroy(&cg->W)); 196 PetscCall(VecDestroy(&cg->work)); 197 PetscCall(VecDestroy(&cg->X_old)); 198 PetscCall(VecDestroy(&cg->G_old)); 199 PetscCall(VecDestroy(&cg->unprojected_gradient)); 200 PetscCall(VecDestroy(&cg->unprojected_gradient_old)); 201 PetscCall(VecDestroy(&cg->g_work)); 202 PetscCall(VecDestroy(&cg->d_work)); 203 PetscCall(VecDestroy(&cg->y_work)); 204 PetscCall(VecDestroy(&cg->sk)); 205 PetscCall(VecDestroy(&cg->yk)); 206 } 207 PetscCall(ISDestroy(&cg->active_lower)); 208 PetscCall(ISDestroy(&cg->active_upper)); 209 PetscCall(ISDestroy(&cg->active_fixed)); 210 PetscCall(ISDestroy(&cg->active_idx)); 211 PetscCall(ISDestroy(&cg->inactive_idx)); 212 PetscCall(ISDestroy(&cg->inactive_old)); 213 PetscCall(ISDestroy(&cg->new_inactives)); 214 PetscCall(MatDestroy(&cg->B)); 215 if (cg->pc) { 216 PetscCall(MatDestroy(&cg->pc)); 217 } 218 PetscCall(PetscFree(tao->data)); 219 PetscFunctionReturn(0); 220 } 221 222 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao) 223 { 224 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 225 226 PetscFunctionBegin; 227 PetscOptionsHeadBegin(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization"); 228 PetscCall(PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type,NULL)); 229 if (cg->cg_type != CG_SSML_BFGS) cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */ 230 if (CG_GradientDescent == cg->cg_type) { 231 cg->cg_type = CG_PCGradientDescent; 232 /* Set scaling equal to none or, at best, scalar scaling. */ 233 cg->unscaled_restart = PETSC_TRUE; 234 cg->diag_scaling = PETSC_FALSE; 235 } 236 PetscCall(PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL)); 237 PetscCall(PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL)); 238 PetscCall(PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL)); 239 PetscCall(PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL)); 240 PetscCall(PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL)); 241 PetscCall(PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL)); 242 PetscCall(PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL)); 243 PetscCall(PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL)); 244 PetscCall(PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL)); 245 PetscCall(PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL)); 246 PetscCall(PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL)); 247 PetscCall(PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL)); 248 PetscCall(PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL)); 249 PetscCall(PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL)); 250 PetscCall(PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL)); 251 PetscCall(PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL)); 252 PetscCall(PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL)); 253 PetscCall(PetscOptionsBool("-tao_bncg_no_scaling","Disable all scaling except in restarts","",cg->no_scaling,&cg->no_scaling,NULL)); 254 if (cg->no_scaling) { 255 cg->diag_scaling = PETSC_FALSE; 256 cg->alpha = -1.0; 257 } 258 if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling) { /* Some more default options that appear to be good. */ 259 cg->neg_xi = PETSC_TRUE; 260 } 261 PetscCall(PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL)); 262 PetscCall(PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL)); 263 PetscCall(PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL)); 264 PetscCall(PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts","",cg->delta_min,&cg->delta_min,NULL)); 265 PetscCall(PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts","",cg->delta_max,&cg->delta_max,NULL)); 266 267 PetscOptionsHeadEnd(); 268 PetscCall(MatSetOptionsPrefix(cg->B, ((PetscObject)tao)->prefix)); 269 PetscCall(MatAppendOptionsPrefix(cg->B, "tao_bncg_")); 270 PetscCall(MatSetFromOptions(cg->B)); 271 PetscFunctionReturn(0); 272 } 273 274 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) 275 { 276 PetscBool isascii; 277 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 278 279 PetscFunctionBegin; 280 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 281 if (isascii) { 282 PetscCall(PetscViewerASCIIPushTab(viewer)); 283 PetscCall(PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type])); 284 PetscCall(PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %" PetscInt_FMT "\n", cg->skipped_updates)); 285 PetscCall(PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %" PetscInt_FMT "\n", cg->resets)); 286 PetscCall(PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %" PetscInt_FMT "\n", cg->pure_gd_steps)); 287 PetscCall(PetscViewerASCIIPrintf(viewer, "Not a descent direction: %" PetscInt_FMT "\n", cg->descent_error)); 288 PetscCall(PetscViewerASCIIPrintf(viewer, "Line search fails: %" PetscInt_FMT "\n", cg->ls_fails)); 289 if (cg->diag_scaling) { 290 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); 291 if (isascii) { 292 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 293 PetscCall(MatView(cg->B, viewer)); 294 PetscCall(PetscViewerPopFormat(viewer)); 295 } 296 } 297 PetscCall(PetscViewerASCIIPopTab(viewer)); 298 } 299 PetscFunctionReturn(0); 300 } 301 302 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha) 303 { 304 PetscReal a, b, c, sig1, sig2; 305 306 PetscFunctionBegin; 307 *scale = 0.0; 308 if (1.0 == alpha) *scale = yts/yty; 309 else if (0.0 == alpha) *scale = sts/yts; 310 else if (-1.0 == alpha) *scale = 1.0; 311 else { 312 a = yty; 313 b = yts; 314 c = sts; 315 a *= alpha; 316 b *= -(2.0*alpha - 1.0); 317 c *= alpha - 1.0; 318 sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 319 sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 320 /* accept the positive root as the scalar */ 321 if (sig1 > 0.0) *scale = sig1; 322 else if (sig2 > 0.0) *scale = sig2; 323 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar"); 324 } 325 PetscFunctionReturn(0); 326 } 327 328 /*MC 329 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 330 331 Options Database Keys: 332 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled) 333 . -tao_bncg_eta <r> - restart tolerance 334 . -tao_bncg_type <taocg_type> - cg formula 335 . -tao_bncg_as_type <none,bertsekas> - active set estimation method 336 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation 337 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation 338 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision. 339 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method 340 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 341 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 342 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method 343 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method 344 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method 345 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method 346 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True. 347 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods 348 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts 349 . -tao_bncg_zeta <r> - Scaling parameter in the KD method 350 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps 351 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps 352 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart 353 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem 354 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations 355 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults. 356 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions 357 358 Notes: 359 CG formulas are: 360 + "gd" - Gradient Descent 361 . "fr" - Fletcher-Reeves 362 . "pr" - Polak-Ribiere-Polyak 363 . "prp" - Polak-Ribiere-Plus 364 . "hs" - Hestenes-Steifel 365 . "dy" - Dai-Yuan 366 . "ssml_bfgs" - Self-Scaling Memoryless BFGS 367 . "ssml_dfp" - Self-Scaling Memoryless DFP 368 . "ssml_brdn" - Self-Scaling Memoryless Broyden 369 . "hz" - Hager-Zhang (CG_DESCENT 5.3) 370 . "dk" - Dai-Kou (2013) 371 - "kd" - Kou-Dai (2015) 372 373 Level: beginner 374 M*/ 375 376 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) 377 { 378 TAO_BNCG *cg; 379 const char *morethuente_type = TAOLINESEARCHMT; 380 381 PetscFunctionBegin; 382 tao->ops->setup = TaoSetUp_BNCG; 383 tao->ops->solve = TaoSolve_BNCG; 384 tao->ops->view = TaoView_BNCG; 385 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 386 tao->ops->destroy = TaoDestroy_BNCG; 387 388 /* Override default settings (unless already changed) */ 389 if (!tao->max_it_changed) tao->max_it = 2000; 390 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 391 392 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 393 /* method. In particular, gtol should be less that 0.5; the value used in */ 394 /* Nocedal and Wright is 0.10. We use the default values for the */ 395 /* linesearch because it seems to work better. */ 396 PetscCall(TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch)); 397 PetscCall(PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1)); 398 PetscCall(TaoLineSearchSetType(tao->linesearch, morethuente_type)); 399 PetscCall(TaoLineSearchUseTaoRoutines(tao->linesearch, tao)); 400 401 PetscCall(PetscNewLog(tao,&cg)); 402 tao->data = (void*)cg; 403 PetscCall(KSPInitializePackage()); 404 PetscCall(MatCreate(PetscObjectComm((PetscObject)tao), &cg->B)); 405 PetscCall(PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1)); 406 PetscCall(MatSetType(cg->B, MATLMVMDIAGBROYDEN)); 407 408 cg->pc = NULL; 409 410 cg->dk_eta = 0.5; 411 cg->hz_eta = 0.4; 412 cg->dynamic_restart = PETSC_FALSE; 413 cg->unscaled_restart = PETSC_FALSE; 414 cg->no_scaling = PETSC_FALSE; 415 cg->delta_min = 1e-7; 416 cg->delta_max = 100; 417 cg->theta = 1.0; 418 cg->hz_theta = 1.0; 419 cg->dfp_scale = 1.0; 420 cg->bfgs_scale = 1.0; 421 cg->zeta = 0.1; 422 cg->min_quad = 6; 423 cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/ 424 cg->xi = 1.0; 425 cg->neg_xi = PETSC_TRUE; 426 cg->spaced_restart = PETSC_FALSE; 427 cg->tol_quad = 1e-8; 428 cg->as_step = 0.001; 429 cg->as_tol = 0.001; 430 cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/ 431 cg->as_type = CG_AS_BERTSEKAS; 432 cg->cg_type = CG_SSML_BFGS; 433 cg->alpha = 1.0; 434 cg->diag_scaling = PETSC_TRUE; 435 PetscFunctionReturn(0); 436 } 437 438 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq) 439 { 440 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 441 PetscReal scaling; 442 443 PetscFunctionBegin; 444 ++cg->resets; 445 scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23); 446 scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling)); 447 if (cg->unscaled_restart) { 448 scaling = 1.0; 449 ++cg->pure_gd_steps; 450 } 451 PetscCall(VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient)); 452 /* Also want to reset our diagonal scaling with each restart */ 453 if (cg->diag_scaling) { 454 PetscCall(MatLMVMReset(cg->B, PETSC_FALSE)); 455 } 456 PetscFunctionReturn(0); 457 } 458 459 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold) 460 { 461 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 462 PetscReal quadinterp; 463 464 PetscFunctionBegin; 465 if (cg->f < cg->min_quad/10) { 466 *dynrestart = PETSC_FALSE; 467 PetscFunctionReturn(0); 468 } /* just skip this since this strategy doesn't work well for functions near zero */ 469 quadinterp = 2.0*(cg->f - fold)/(stepsize*(gd + gd_old)); 470 if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad; 471 else { 472 cg->iter_quad = 0; 473 *dynrestart = PETSC_FALSE; 474 } 475 if (cg->iter_quad >= cg->min_quad) { 476 cg->iter_quad = 0; 477 *dynrestart = PETSC_TRUE; 478 } 479 PetscFunctionReturn(0); 480 } 481 482 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback) 483 { 484 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 485 PetscReal gamma = 1.0, tau_k, beta; 486 PetscReal tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk=1.0, gd; 487 PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg; 488 PetscInt dim; 489 PetscBool cg_restart = PETSC_FALSE; 490 PetscFunctionBegin; 491 492 /* Local curvature check to see if we need to restart */ 493 if (tao->niter >= 1 || tao->recycle) { 494 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 495 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 496 ynorm2 = ynorm*ynorm; 497 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 498 if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON) { 499 cg_restart = PETSC_TRUE; 500 ++cg->skipped_updates; 501 } 502 if (cg->spaced_restart) { 503 PetscCall(VecGetSize(tao->gradient, &dim)); 504 if (tao->niter % (dim*cg->min_restart_num)) cg_restart = PETSC_TRUE; 505 } 506 } 507 /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */ 508 if (cg->spaced_restart) { 509 PetscCall(VecGetSize(tao->gradient, &dim)); 510 if (0 == tao->niter % (6*dim)) cg_restart = PETSC_TRUE; 511 } 512 /* Compute the diagonal scaling vector if applicable */ 513 if (cg->diag_scaling) { 514 PetscCall(MatLMVMUpdate(cg->B, tao->solution, tao->gradient)); 515 } 516 517 /* A note on diagonal scaling (to be added to paper): 518 For the FR, PR, PRP, and DY methods, the diagonally scaled versions 519 must be derived as a preconditioned CG method rather than as 520 a Hessian initialization like in the Broyden methods. */ 521 522 /* In that case, one writes the objective function as 523 f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k). 524 Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to 525 HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the 526 same under preconditioning. Note that A is diagonal, such that A^T = A. */ 527 528 /* This yields questions like what the dot product d_k^T y_k 529 should look like. HZ mistakenly treats that as the same under 530 preconditioning, but that is not necessarily true. */ 531 532 /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation, 533 we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}), 534 yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is 535 NOT the same if our preconditioning matrix is updated between iterations. 536 This same issue is found when considering dot products of the form g_{k+1}^T y_k. */ 537 538 /* Compute CG step direction */ 539 if (cg_restart) { 540 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 541 } else if (pcgd_fallback) { 542 /* Just like preconditioned CG */ 543 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 544 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 545 } else if (ynorm2 > PETSC_MACHINE_EPSILON) { 546 switch (cg->cg_type) { 547 case CG_PCGradientDescent: 548 if (!cg->diag_scaling) { 549 if (!cg->no_scaling) { 550 cg->sts = step*step*dnorm*dnorm; 551 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha)); 552 } else { 553 tau_k = 1.0; 554 ++cg->pure_gd_steps; 555 } 556 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient)); 557 } else { 558 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 559 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 560 } 561 break; 562 563 case CG_HestenesStiefel: 564 /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */ 565 if (!cg->diag_scaling) { 566 cg->sts = step*step*dnorm*dnorm; 567 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 568 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha)); 569 beta = tau_k*gkp1_yk/dk_yk; 570 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 571 } else { 572 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 573 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 574 beta = gkp1_yk/dk_yk; 575 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 576 } 577 break; 578 579 case CG_FletcherReeves: 580 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 581 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 582 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 583 ynorm2 = ynorm*ynorm; 584 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 585 if (!cg->diag_scaling) { 586 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha)); 587 beta = tau_k*gnorm2/gnorm2_old; 588 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 589 } else { 590 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Before it's updated */ 591 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 592 PetscCall(VecDot(tao->gradient, cg->g_work, &tmp)); 593 beta = tmp/gnorm2_old; 594 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 595 } 596 break; 597 598 case CG_PolakRibierePolyak: 599 snorm = step*dnorm; 600 if (!cg->diag_scaling) { 601 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 602 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 603 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha)); 604 beta = tau_k*gkp1_yk/gnorm2_old; 605 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 606 } else { 607 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); 608 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 609 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 610 beta = gkp1_yk/gnorm2_old; 611 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 612 } 613 break; 614 615 case CG_PolakRibierePlus: 616 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 617 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 618 ynorm2 = ynorm*ynorm; 619 if (!cg->diag_scaling) { 620 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 621 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 622 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha)); 623 beta = tau_k*gkp1_yk/gnorm2_old; 624 beta = PetscMax(beta, 0.0); 625 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 626 } else { 627 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Old gtDg */ 628 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 629 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 630 beta = gkp1_yk/gnorm2_old; 631 beta = PetscMax(beta, 0.0); 632 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 633 } 634 break; 635 636 case CG_DaiYuan: 637 /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property." 638 SIAM Journal on optimization 10, no. 1 (1999): 177-182. */ 639 if (!cg->diag_scaling) { 640 PetscCall(VecDot(tao->stepdirection, tao->gradient, &gd)); 641 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 642 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha)); 643 beta = tau_k*gnorm2/(gd - gd_old); 644 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 645 } else { 646 PetscCall(MatMult(cg->B, tao->stepdirection, cg->d_work)); 647 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 648 PetscCall(VecDot(cg->g_work, tao->gradient, >Dg)); 649 PetscCall(VecDot(tao->stepdirection, cg->G_old, &gd_old)); 650 PetscCall(VecDot(cg->d_work, cg->g_work, &dk_yk)); 651 dk_yk = dk_yk - gd_old; 652 beta = gtDg/dk_yk; 653 PetscCall(VecScale(cg->d_work, beta)); 654 PetscCall(VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work)); 655 } 656 break; 657 658 case CG_HagerZhang: 659 /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent." 660 ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */ 661 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 662 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 663 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 664 snorm = dnorm*step; 665 cg->yts = step*dk_yk; 666 if (cg->use_dynamic_restart) { 667 PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 668 } 669 if (cg->dynamic_restart) { 670 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 671 } else { 672 if (!cg->diag_scaling) { 673 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 674 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 675 /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */ 676 tmp = gd/dk_yk; 677 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)); 678 /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */ 679 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 680 /* d <- -t*g + beta*t*d */ 681 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 682 } else { 683 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 684 cg->yty = ynorm2; 685 cg->sts = snorm*snorm; 686 /* Apply the diagonal scaling to all my vectors */ 687 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 688 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 689 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 690 /* Construct the constant ytDgkp1 */ 691 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 692 /* Construct the constant for scaling Dkyk in the update */ 693 tmp = gd/dk_yk; 694 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 695 tau_k = -tau_k*gd/(dk_yk*dk_yk); 696 /* beta is the constant which adds the dk contribution */ 697 beta = gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */ 698 /* From HZ2013, modified to account for diagonal scaling*/ 699 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 700 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 701 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 702 /* Do the update */ 703 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 704 } 705 } 706 break; 707 708 case CG_DaiKou: 709 /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search." 710 SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */ 711 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 712 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 713 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 714 snorm = step*dnorm; 715 cg->yts = dk_yk*step; 716 if (!cg->diag_scaling) { 717 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 718 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 719 /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */ 720 tmp = gd/dk_yk; 721 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk; 722 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 723 /* d <- -t*g + beta*t*d */ 724 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk)); 725 } else { 726 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 727 cg->yty = ynorm2; 728 cg->sts = snorm*snorm; 729 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 730 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 731 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 732 /* Construct the constant ytDgkp1 */ 733 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 734 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 735 tau_k = tau_k*gd/(dk_yk*dk_yk); 736 tmp = gd/dk_yk; 737 /* beta is the constant which adds the dk contribution */ 738 beta = gkp1_yk/dk_yk - step*tmp - tau_k; 739 /* Update this for the last term in beta */ 740 PetscCall(VecDot(cg->y_work, tao->stepdirection, &dk_yk)); 741 beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */ 742 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 743 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 744 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 745 /* Do the update */ 746 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 747 } 748 break; 749 750 case CG_KouDai: 751 /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization." 752 Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */ 753 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 754 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 755 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 756 snorm = step*dnorm; 757 cg->yts = dk_yk*step; 758 if (cg->use_dynamic_restart) { 759 PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 760 } 761 if (cg->dynamic_restart) { 762 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 763 } else { 764 if (!cg->diag_scaling) { 765 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 766 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 767 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 768 if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */ 769 { 770 beta = cg->zeta*tau_k*gd/(dnorm*dnorm); 771 gamma = 0.0; 772 } else { 773 if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk; 774 /* This seems to be very effective when there's no tau_k scaling. 775 This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */ 776 else { 777 gamma = cg->xi*gd/dk_yk; 778 } 779 } 780 /* d <- -t*g + beta*t*d + t*tmp*yk */ 781 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk)); 782 } else { 783 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 784 cg->yty = ynorm2; 785 cg->sts = snorm*snorm; 786 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 787 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 788 /* Construct the constant ytDgkp1 */ 789 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1D_yk)); 790 /* Construct the constant for scaling Dkyk in the update */ 791 gamma = gd/dk_yk; 792 /* tau_k = -ytDy/(ytd)^2 * gd */ 793 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 794 tau_k = tau_k*gd/(dk_yk*dk_yk); 795 /* beta is the constant which adds the d_k contribution */ 796 beta = gkp1D_yk/dk_yk - step*gamma - tau_k; 797 /* Here is the requisite check */ 798 PetscCall(VecDot(tao->stepdirection, cg->g_work, &tmp)); 799 if (cg->neg_xi) { 800 /* modified KD implementation */ 801 if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk; 802 else { 803 gamma = cg->xi*gd/dk_yk; 804 } 805 if (beta < cg->zeta*tmp/(dnorm*dnorm)) { 806 beta = cg->zeta*tmp/(dnorm*dnorm); 807 gamma = 0.0; 808 } 809 } else { /* original KD 2015 implementation */ 810 if (beta < cg->zeta*tmp/(dnorm*dnorm)) { 811 beta = cg->zeta*tmp/(dnorm*dnorm); 812 gamma = 0.0; 813 } else { 814 gamma = cg->xi*gd/dk_yk; 815 } 816 } 817 /* Do the update in two steps */ 818 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 819 PetscCall(VecAXPY(tao->stepdirection, gamma, cg->y_work)); 820 } 821 } 822 break; 823 824 case CG_SSML_BFGS: 825 /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory." 826 Discussion Papers 269 (1977). */ 827 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 828 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 829 snorm = step*dnorm; 830 cg->yts = dk_yk*step; 831 cg->yty = ynorm2; 832 cg->sts = snorm*snorm; 833 if (!cg->diag_scaling) { 834 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 835 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 836 tmp = gd/dk_yk; 837 beta = tau_k*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*tmp; 838 /* d <- -t*g + beta*t*d + t*tmp*yk */ 839 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk)); 840 } else { 841 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 842 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 843 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 844 /* compute scalar gamma */ 845 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 846 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 847 gamma = gd/dk_yk; 848 /* Compute scalar beta */ 849 beta = (gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 850 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 851 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 852 } 853 break; 854 855 case CG_SSML_DFP: 856 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 857 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 858 snorm = step*dnorm; 859 cg->yts = dk_yk*step; 860 cg->yty = ynorm2; 861 cg->sts = snorm*snorm; 862 if (!cg->diag_scaling) { 863 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 864 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 865 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 866 tau_k = cg->dfp_scale*tau_k; 867 tmp = tau_k*gkp1_yk/cg->yty; 868 beta = -step*gd/dk_yk; 869 /* d <- -t*g + beta*d + tmp*yk */ 870 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 871 } else { 872 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */ 873 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 874 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 875 /* compute scalar gamma */ 876 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 877 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 878 gamma = (gkp1_yk/tmp); 879 /* Compute scalar beta */ 880 beta = -step*gd/dk_yk; 881 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 882 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 883 } 884 break; 885 886 case CG_SSML_BROYDEN: 887 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 888 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 889 snorm = step*dnorm; 890 cg->yts = step*dk_yk; 891 cg->yty = ynorm2; 892 cg->sts = snorm*snorm; 893 if (!cg->diag_scaling) { 894 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 895 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale)); 896 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale)); 897 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 898 tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp; 899 /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0, 900 it should reproduce the tau_dfp scaling. Same with dfp_scale. */ 901 tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/cg->yty; 902 beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 903 /* d <- -t*g + beta*d + tmp*yk */ 904 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 905 } else { 906 /* We have diagonal scaling enabled */ 907 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 908 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 909 /* compute scalar gamma */ 910 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 911 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 912 gamma = cg->theta*gd/dk_yk + (1-cg->theta)*(gkp1_yk/tmp); 913 /* Compute scalar beta */ 914 beta = cg->theta*(gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 915 /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 916 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 917 } 918 break; 919 920 default: 921 break; 922 923 } 924 } 925 PetscFunctionReturn(0); 926 } 927 928 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm) 929 { 930 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 931 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 932 PetscReal step=1.0,gnorm2,gd,dnorm=0.0; 933 PetscReal gnorm2_old,f_old,resnorm, gnorm_old; 934 PetscBool pcgd_fallback = PETSC_FALSE; 935 936 PetscFunctionBegin; 937 /* We are now going to perform a line search along the direction. */ 938 /* Store solution and gradient info before it changes */ 939 PetscCall(VecCopy(tao->solution, cg->X_old)); 940 PetscCall(VecCopy(tao->gradient, cg->G_old)); 941 PetscCall(VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old)); 942 943 gnorm_old = gnorm; 944 gnorm2_old = gnorm_old*gnorm_old; 945 f_old = cg->f; 946 /* Perform bounded line search. If we are recycling a solution from a previous */ 947 /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */ 948 if (!(tao->recycle && 0 == tao->niter)) { 949 /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */ 950 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 951 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 952 PetscCall(TaoAddLineSearchCounts(tao)); 953 954 /* Check linesearch failure */ 955 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 956 ++cg->ls_fails; 957 if (cg->cg_type == CG_GradientDescent) { 958 /* Nothing left to do but fail out of the optimization */ 959 step = 0.0; 960 tao->reason = TAO_DIVERGED_LS_FAILURE; 961 } else { 962 /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */ 963 PetscCall(VecCopy(cg->X_old, tao->solution)); 964 PetscCall(VecCopy(cg->G_old, tao->gradient)); 965 PetscCall(VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient)); 966 gnorm = gnorm_old; 967 gnorm2 = gnorm2_old; 968 cg->f = f_old; 969 970 /* Fall back on preconditioned CG (so long as you're not already using it) */ 971 if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling) { 972 pcgd_fallback = PETSC_TRUE; 973 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 974 975 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 976 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 977 978 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 979 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 980 PetscCall(TaoAddLineSearchCounts(tao)); 981 982 pcgd_fallback = PETSC_FALSE; 983 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 984 /* Going to perform a regular gradient descent step. */ 985 ++cg->ls_fails; 986 step = 0.0; 987 } 988 } 989 /* Fall back on the scaled gradient step */ 990 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 991 ++cg->ls_fails; 992 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 993 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 994 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 995 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 996 PetscCall(TaoAddLineSearchCounts(tao)); 997 } 998 999 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 1000 /* Nothing left to do but fail out of the optimization */ 1001 ++cg->ls_fails; 1002 step = 0.0; 1003 tao->reason = TAO_DIVERGED_LS_FAILURE; 1004 } else { 1005 /* One of the fallbacks worked. Set them both back equal to false. */ 1006 pcgd_fallback = PETSC_FALSE; 1007 } 1008 } 1009 } 1010 /* Convergence test for line search failure */ 1011 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1012 1013 /* Standard convergence test */ 1014 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 1015 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 1016 PetscCheck(!PetscIsInfOrNanReal(resnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 1017 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 1018 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 1019 PetscCall((*tao->ops->convergencetest)(tao,tao->cnvP)); 1020 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1021 } 1022 /* Assert we have an updated step and we need at least one more iteration. */ 1023 /* Calculate the next direction */ 1024 /* Estimate the active set at the new solution */ 1025 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 1026 /* Compute the projected gradient and its norm */ 1027 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 1028 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 1029 PetscCall(VecNorm(tao->gradient,NORM_2,&gnorm)); 1030 gnorm2 = gnorm*gnorm; 1031 1032 /* Calculate some quantities used in the StepDirectionUpdate. */ 1033 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 1034 /* Update the step direction. */ 1035 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 1036 ++tao->niter; 1037 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 1038 1039 if (cg->cg_type != CG_GradientDescent) { 1040 /* Figure out which previously active variables became inactive this iteration */ 1041 PetscCall(ISDestroy(&cg->new_inactives)); 1042 if (cg->inactive_idx && cg->inactive_old) { 1043 PetscCall(ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives)); 1044 } 1045 /* Selectively reset the CG step those freshly inactive variables */ 1046 if (cg->new_inactives) { 1047 PetscCall(VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 1048 PetscCall(VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 1049 PetscCall(VecCopy(cg->inactive_grad, cg->inactive_step)); 1050 PetscCall(VecScale(cg->inactive_step, -1.0)); 1051 PetscCall(VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 1052 PetscCall(VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 1053 } 1054 /* Verify that this is a descent direction */ 1055 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 1056 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 1057 if (PetscIsInfOrNanReal(gd) || (gd/(dnorm*dnorm) <= -1e10 || gd/(dnorm*dnorm) >= -1e-10)) { 1058 /* Not a descent direction, so we reset back to projected gradient descent */ 1059 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 1060 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 1061 ++cg->descent_error; 1062 } else { 1063 } 1064 } 1065 PetscFunctionReturn(0); 1066 } 1067 1068 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0) 1069 { 1070 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 1071 1072 PetscFunctionBegin; 1073 PetscCall(PetscObjectReference((PetscObject)H0)); 1074 cg->pc = H0; 1075 PetscFunctionReturn(0); 1076 } 1077