1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/ 3 #include <petscksp.h> 4 5 #define CG_GradientDescent 0 6 #define CG_HestenesStiefel 1 7 #define CG_FletcherReeves 2 8 #define CG_PolakRibierePolyak 3 9 #define CG_PolakRibierePlus 4 10 #define CG_DaiYuan 5 11 #define CG_HagerZhang 6 12 #define CG_DaiKou 7 13 #define CG_KouDai 8 14 #define CG_SSML_BFGS 9 15 #define CG_SSML_DFP 10 16 #define CG_SSML_BROYDEN 11 17 #define CG_PCGradientDescent 12 18 #define CGTypes 13 19 20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"}; 21 22 #define CG_AS_NONE 0 23 #define CG_AS_BERTSEKAS 1 24 #define CG_AS_SIZE 2 25 26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 27 28 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) 29 { 30 PetscErrorCode ierr; 31 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 32 33 PetscFunctionBegin; 34 PetscCall(ISDestroy(&cg->inactive_old)); 35 if (cg->inactive_idx) { 36 PetscCall(ISDuplicate(cg->inactive_idx, &cg->inactive_old)); 37 PetscCall(ISCopy(cg->inactive_idx, cg->inactive_old)); 38 } 39 switch (asType) { 40 case CG_AS_NONE: 41 PetscCall(ISDestroy(&cg->inactive_idx)); 42 PetscCall(VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx)); 43 PetscCall(ISDestroy(&cg->active_idx)); 44 PetscCall(ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx)); 45 break; 46 47 case CG_AS_BERTSEKAS: 48 /* Use gradient descent to estimate the active set */ 49 PetscCall(VecCopy(cg->unprojected_gradient, cg->W)); 50 PetscCall(VecScale(cg->W, -1.0)); 51 ierr = TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, 52 &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);PetscCall(ierr); 53 break; 54 55 default: 56 break; 57 } 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step) 62 { 63 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 64 65 PetscFunctionBegin; 66 switch (asType) { 67 case CG_AS_NONE: 68 PetscCall(VecISSet(step, cg->active_idx, 0.0)); 69 break; 70 71 case CG_AS_BERTSEKAS: 72 PetscCall(TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step)); 73 break; 74 75 default: 76 break; 77 } 78 PetscFunctionReturn(0); 79 } 80 81 static PetscErrorCode TaoSolve_BNCG(Tao tao) 82 { 83 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 84 PetscReal step=1.0,gnorm,gnorm2, resnorm; 85 PetscInt nDiff; 86 87 PetscFunctionBegin; 88 /* Project the current point onto the feasible set */ 89 PetscCall(TaoComputeVariableBounds(tao)); 90 PetscCall(TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU)); 91 92 /* Project the initial point onto the feasible region */ 93 PetscCall(TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution)); 94 95 if (nDiff > 0 || !tao->recycle) { 96 PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 97 } 98 PetscCall(VecNorm(cg->unprojected_gradient,NORM_2,&gnorm)); 99 PetscCheck(!PetscIsInfOrNanReal(cg->f) && !PetscIsInfOrNanReal(gnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 100 101 /* Estimate the active set and compute the projected gradient */ 102 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 103 104 /* Project the gradient and calculate the norm */ 105 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 106 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 107 PetscCall(VecNorm(tao->gradient,NORM_2,&gnorm)); 108 gnorm2 = gnorm*gnorm; 109 110 /* Initialize counters */ 111 tao->niter = 0; 112 cg->ls_fails = cg->descent_error = 0; 113 cg->resets = -1; 114 cg->skipped_updates = cg->pure_gd_steps = 0; 115 cg->iter_quad = 0; 116 117 /* Convergence test at the starting point. */ 118 tao->reason = TAO_CONTINUE_ITERATING; 119 120 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 121 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 122 PetscCheck(!PetscIsInfOrNanReal(resnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 123 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 124 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 125 PetscCall((*tao->ops->convergencetest)(tao,tao->cnvP)); 126 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 127 /* Calculate initial direction. */ 128 if (!tao->recycle) { 129 /* We are not recycling a solution/history from a past TaoSolve */ 130 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 131 } 132 /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */ 133 while (1) { 134 /* Call general purpose update function */ 135 if (tao->ops->update) { 136 PetscCall((*tao->ops->update)(tao, tao->niter, tao->user_update)); 137 } 138 PetscCall(TaoBNCGConductIteration(tao, gnorm)); 139 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 140 } 141 } 142 143 static PetscErrorCode TaoSetUp_BNCG(Tao tao) 144 { 145 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 146 147 PetscFunctionBegin; 148 if (!tao->gradient) { 149 PetscCall(VecDuplicate(tao->solution,&tao->gradient)); 150 } 151 if (!tao->stepdirection) { 152 PetscCall(VecDuplicate(tao->solution,&tao->stepdirection)); 153 } 154 if (!cg->W) { 155 PetscCall(VecDuplicate(tao->solution,&cg->W)); 156 } 157 if (!cg->work) { 158 PetscCall(VecDuplicate(tao->solution,&cg->work)); 159 } 160 if (!cg->sk) { 161 PetscCall(VecDuplicate(tao->solution,&cg->sk)); 162 } 163 if (!cg->yk) { 164 PetscCall(VecDuplicate(tao->gradient,&cg->yk)); 165 } 166 if (!cg->X_old) { 167 PetscCall(VecDuplicate(tao->solution,&cg->X_old)); 168 } 169 if (!cg->G_old) { 170 PetscCall(VecDuplicate(tao->gradient,&cg->G_old)); 171 } 172 if (cg->diag_scaling) { 173 PetscCall(VecDuplicate(tao->solution,&cg->d_work)); 174 PetscCall(VecDuplicate(tao->solution,&cg->y_work)); 175 PetscCall(VecDuplicate(tao->solution,&cg->g_work)); 176 } 177 if (!cg->unprojected_gradient) { 178 PetscCall(VecDuplicate(tao->gradient,&cg->unprojected_gradient)); 179 } 180 if (!cg->unprojected_gradient_old) { 181 PetscCall(VecDuplicate(tao->gradient,&cg->unprojected_gradient_old)); 182 } 183 PetscCall(MatLMVMAllocate(cg->B, cg->sk, cg->yk)); 184 if (cg->pc) { 185 PetscCall(MatLMVMSetJ0(cg->B, cg->pc)); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 static PetscErrorCode TaoDestroy_BNCG(Tao tao) 191 { 192 TAO_BNCG *cg = (TAO_BNCG*) tao->data; 193 194 PetscFunctionBegin; 195 if (tao->setupcalled) { 196 PetscCall(VecDestroy(&cg->W)); 197 PetscCall(VecDestroy(&cg->work)); 198 PetscCall(VecDestroy(&cg->X_old)); 199 PetscCall(VecDestroy(&cg->G_old)); 200 PetscCall(VecDestroy(&cg->unprojected_gradient)); 201 PetscCall(VecDestroy(&cg->unprojected_gradient_old)); 202 PetscCall(VecDestroy(&cg->g_work)); 203 PetscCall(VecDestroy(&cg->d_work)); 204 PetscCall(VecDestroy(&cg->y_work)); 205 PetscCall(VecDestroy(&cg->sk)); 206 PetscCall(VecDestroy(&cg->yk)); 207 } 208 PetscCall(ISDestroy(&cg->active_lower)); 209 PetscCall(ISDestroy(&cg->active_upper)); 210 PetscCall(ISDestroy(&cg->active_fixed)); 211 PetscCall(ISDestroy(&cg->active_idx)); 212 PetscCall(ISDestroy(&cg->inactive_idx)); 213 PetscCall(ISDestroy(&cg->inactive_old)); 214 PetscCall(ISDestroy(&cg->new_inactives)); 215 PetscCall(MatDestroy(&cg->B)); 216 if (cg->pc) { 217 PetscCall(MatDestroy(&cg->pc)); 218 } 219 PetscCall(PetscFree(tao->data)); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao) 224 { 225 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 226 227 PetscFunctionBegin; 228 PetscCall(PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization")); 229 PetscCall(PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type,NULL)); 230 if (cg->cg_type != CG_SSML_BFGS) cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */ 231 if (CG_GradientDescent == cg->cg_type) { 232 cg->cg_type = CG_PCGradientDescent; 233 /* Set scaling equal to none or, at best, scalar scaling. */ 234 cg->unscaled_restart = PETSC_TRUE; 235 cg->diag_scaling = PETSC_FALSE; 236 } 237 PetscCall(PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL)); 238 PetscCall(PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL)); 239 PetscCall(PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL)); 240 PetscCall(PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL)); 241 PetscCall(PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL)); 242 PetscCall(PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL)); 243 PetscCall(PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL)); 244 PetscCall(PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL)); 245 PetscCall(PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL)); 246 PetscCall(PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL)); 247 PetscCall(PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL)); 248 PetscCall(PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL)); 249 PetscCall(PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL)); 250 PetscCall(PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL)); 251 PetscCall(PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL)); 252 PetscCall(PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL)); 253 PetscCall(PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL)); 254 PetscCall(PetscOptionsBool("-tao_bncg_no_scaling","Disable all scaling except in restarts","",cg->no_scaling,&cg->no_scaling,NULL)); 255 if (cg->no_scaling) { 256 cg->diag_scaling = PETSC_FALSE; 257 cg->alpha = -1.0; 258 } 259 if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling) { /* Some more default options that appear to be good. */ 260 cg->neg_xi = PETSC_TRUE; 261 } 262 PetscCall(PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL)); 263 PetscCall(PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL)); 264 PetscCall(PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL)); 265 PetscCall(PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts","",cg->delta_min,&cg->delta_min,NULL)); 266 PetscCall(PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts","",cg->delta_max,&cg->delta_max,NULL)); 267 268 PetscCall(PetscOptionsTail()); 269 PetscCall(MatSetOptionsPrefix(cg->B, ((PetscObject)tao)->prefix)); 270 PetscCall(MatAppendOptionsPrefix(cg->B, "tao_bncg_")); 271 PetscCall(MatSetFromOptions(cg->B)); 272 PetscFunctionReturn(0); 273 } 274 275 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) 276 { 277 PetscBool isascii; 278 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 279 280 PetscFunctionBegin; 281 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 282 if (isascii) { 283 PetscCall(PetscViewerASCIIPushTab(viewer)); 284 PetscCall(PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type])); 285 PetscCall(PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %i\n", cg->skipped_updates)); 286 PetscCall(PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %i\n", cg->resets)); 287 PetscCall(PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %i\n", cg->pure_gd_steps)); 288 PetscCall(PetscViewerASCIIPrintf(viewer, "Not a descent direction: %i\n", cg->descent_error)); 289 PetscCall(PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails)); 290 if (cg->diag_scaling) { 291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); 292 if (isascii) { 293 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 294 PetscCall(MatView(cg->B, viewer)); 295 PetscCall(PetscViewerPopFormat(viewer)); 296 } 297 } 298 PetscCall(PetscViewerASCIIPopTab(viewer)); 299 } 300 PetscFunctionReturn(0); 301 } 302 303 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha) 304 { 305 PetscReal a, b, c, sig1, sig2; 306 307 PetscFunctionBegin; 308 *scale = 0.0; 309 if (1.0 == alpha) *scale = yts/yty; 310 else if (0.0 == alpha) *scale = sts/yts; 311 else if (-1.0 == alpha) *scale = 1.0; 312 else { 313 a = yty; 314 b = yts; 315 c = sts; 316 a *= alpha; 317 b *= -(2.0*alpha - 1.0); 318 c *= alpha - 1.0; 319 sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 320 sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 321 /* accept the positive root as the scalar */ 322 if (sig1 > 0.0) *scale = sig1; 323 else if (sig2 > 0.0) *scale = sig2; 324 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar"); 325 } 326 PetscFunctionReturn(0); 327 } 328 329 /*MC 330 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 331 332 Options Database Keys: 333 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled) 334 . -tao_bncg_eta <r> - restart tolerance 335 . -tao_bncg_type <taocg_type> - cg formula 336 . -tao_bncg_as_type <none,bertsekas> - active set estimation method 337 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation 338 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation 339 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision. 340 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method 341 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 342 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 343 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method 344 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method 345 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method 346 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method 347 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True. 348 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods 349 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts 350 . -tao_bncg_zeta <r> - Scaling parameter in the KD method 351 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps 352 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps 353 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart 354 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem 355 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations 356 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults. 357 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions 358 359 Notes: 360 CG formulas are: 361 + "gd" - Gradient Descent 362 . "fr" - Fletcher-Reeves 363 . "pr" - Polak-Ribiere-Polyak 364 . "prp" - Polak-Ribiere-Plus 365 . "hs" - Hestenes-Steifel 366 . "dy" - Dai-Yuan 367 . "ssml_bfgs" - Self-Scaling Memoryless BFGS 368 . "ssml_dfp" - Self-Scaling Memoryless DFP 369 . "ssml_brdn" - Self-Scaling Memoryless Broyden 370 . "hz" - Hager-Zhang (CG_DESCENT 5.3) 371 . "dk" - Dai-Kou (2013) 372 - "kd" - Kou-Dai (2015) 373 374 Level: beginner 375 M*/ 376 377 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) 378 { 379 TAO_BNCG *cg; 380 const char *morethuente_type = TAOLINESEARCHMT; 381 382 PetscFunctionBegin; 383 tao->ops->setup = TaoSetUp_BNCG; 384 tao->ops->solve = TaoSolve_BNCG; 385 tao->ops->view = TaoView_BNCG; 386 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 387 tao->ops->destroy = TaoDestroy_BNCG; 388 389 /* Override default settings (unless already changed) */ 390 if (!tao->max_it_changed) tao->max_it = 2000; 391 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 392 393 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 394 /* method. In particular, gtol should be less that 0.5; the value used in */ 395 /* Nocedal and Wright is 0.10. We use the default values for the */ 396 /* linesearch because it seems to work better. */ 397 PetscCall(TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch)); 398 PetscCall(PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1)); 399 PetscCall(TaoLineSearchSetType(tao->linesearch, morethuente_type)); 400 PetscCall(TaoLineSearchUseTaoRoutines(tao->linesearch, tao)); 401 402 PetscCall(PetscNewLog(tao,&cg)); 403 tao->data = (void*)cg; 404 PetscCall(KSPInitializePackage()); 405 PetscCall(MatCreate(PetscObjectComm((PetscObject)tao), &cg->B)); 406 PetscCall(PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1)); 407 PetscCall(MatSetType(cg->B, MATLMVMDIAGBROYDEN)); 408 409 cg->pc = NULL; 410 411 cg->dk_eta = 0.5; 412 cg->hz_eta = 0.4; 413 cg->dynamic_restart = PETSC_FALSE; 414 cg->unscaled_restart = PETSC_FALSE; 415 cg->no_scaling = PETSC_FALSE; 416 cg->delta_min = 1e-7; 417 cg->delta_max = 100; 418 cg->theta = 1.0; 419 cg->hz_theta = 1.0; 420 cg->dfp_scale = 1.0; 421 cg->bfgs_scale = 1.0; 422 cg->zeta = 0.1; 423 cg->min_quad = 6; 424 cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/ 425 cg->xi = 1.0; 426 cg->neg_xi = PETSC_TRUE; 427 cg->spaced_restart = PETSC_FALSE; 428 cg->tol_quad = 1e-8; 429 cg->as_step = 0.001; 430 cg->as_tol = 0.001; 431 cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/ 432 cg->as_type = CG_AS_BERTSEKAS; 433 cg->cg_type = CG_SSML_BFGS; 434 cg->alpha = 1.0; 435 cg->diag_scaling = PETSC_TRUE; 436 PetscFunctionReturn(0); 437 } 438 439 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq) 440 { 441 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 442 PetscReal scaling; 443 444 PetscFunctionBegin; 445 ++cg->resets; 446 scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23); 447 scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling)); 448 if (cg->unscaled_restart) { 449 scaling = 1.0; 450 ++cg->pure_gd_steps; 451 } 452 PetscCall(VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient)); 453 /* Also want to reset our diagonal scaling with each restart */ 454 if (cg->diag_scaling) { 455 PetscCall(MatLMVMReset(cg->B, PETSC_FALSE)); 456 } 457 PetscFunctionReturn(0); 458 } 459 460 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold) 461 { 462 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 463 PetscReal quadinterp; 464 465 PetscFunctionBegin; 466 if (cg->f < cg->min_quad/10) { 467 *dynrestart = PETSC_FALSE; 468 PetscFunctionReturn(0); 469 } /* just skip this since this strategy doesn't work well for functions near zero */ 470 quadinterp = 2.0*(cg->f - fold)/(stepsize*(gd + gd_old)); 471 if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad; 472 else { 473 cg->iter_quad = 0; 474 *dynrestart = PETSC_FALSE; 475 } 476 if (cg->iter_quad >= cg->min_quad) { 477 cg->iter_quad = 0; 478 *dynrestart = PETSC_TRUE; 479 } 480 PetscFunctionReturn(0); 481 } 482 483 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback) 484 { 485 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 486 PetscReal gamma = 1.0, tau_k, beta; 487 PetscReal tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk=1.0, gd; 488 PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg; 489 PetscInt dim; 490 PetscBool cg_restart = PETSC_FALSE; 491 PetscFunctionBegin; 492 493 /* Local curvature check to see if we need to restart */ 494 if (tao->niter >= 1 || tao->recycle) { 495 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 496 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 497 ynorm2 = ynorm*ynorm; 498 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 499 if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON) { 500 cg_restart = PETSC_TRUE; 501 ++cg->skipped_updates; 502 } 503 if (cg->spaced_restart) { 504 PetscCall(VecGetSize(tao->gradient, &dim)); 505 if (tao->niter % (dim*cg->min_restart_num)) cg_restart = PETSC_TRUE; 506 } 507 } 508 /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */ 509 if (cg->spaced_restart) { 510 PetscCall(VecGetSize(tao->gradient, &dim)); 511 if (0 == tao->niter % (6*dim)) cg_restart = PETSC_TRUE; 512 } 513 /* Compute the diagonal scaling vector if applicable */ 514 if (cg->diag_scaling) { 515 PetscCall(MatLMVMUpdate(cg->B, tao->solution, tao->gradient)); 516 } 517 518 /* A note on diagonal scaling (to be added to paper): 519 For the FR, PR, PRP, and DY methods, the diagonally scaled versions 520 must be derived as a preconditioned CG method rather than as 521 a Hessian initialization like in the Broyden methods. */ 522 523 /* In that case, one writes the objective function as 524 f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k). 525 Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to 526 HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the 527 same under preconditioning. Note that A is diagonal, such that A^T = A. */ 528 529 /* This yields questions like what the dot product d_k^T y_k 530 should look like. HZ mistakenly treats that as the same under 531 preconditioning, but that is not necessarily true. */ 532 533 /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation, 534 we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}), 535 yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is 536 NOT the same if our preconditioning matrix is updated between iterations. 537 This same issue is found when considering dot products of the form g_{k+1}^T y_k. */ 538 539 /* Compute CG step direction */ 540 if (cg_restart) { 541 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 542 } else if (pcgd_fallback) { 543 /* Just like preconditioned CG */ 544 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 545 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 546 } else if (ynorm2 > PETSC_MACHINE_EPSILON) { 547 switch (cg->cg_type) { 548 case CG_PCGradientDescent: 549 if (!cg->diag_scaling) { 550 if (!cg->no_scaling) { 551 cg->sts = step*step*dnorm*dnorm; 552 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha)); 553 } else { 554 tau_k = 1.0; 555 ++cg->pure_gd_steps; 556 } 557 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient)); 558 } else { 559 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 560 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 561 } 562 break; 563 564 case CG_HestenesStiefel: 565 /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */ 566 if (!cg->diag_scaling) { 567 cg->sts = step*step*dnorm*dnorm; 568 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 569 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha)); 570 beta = tau_k*gkp1_yk/dk_yk; 571 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 572 } else { 573 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 574 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 575 beta = gkp1_yk/dk_yk; 576 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 577 } 578 break; 579 580 case CG_FletcherReeves: 581 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 582 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 583 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 584 ynorm2 = ynorm*ynorm; 585 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 586 if (!cg->diag_scaling) { 587 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha)); 588 beta = tau_k*gnorm2/gnorm2_old; 589 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 590 } else { 591 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Before it's updated */ 592 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 593 PetscCall(VecDot(tao->gradient, cg->g_work, &tmp)); 594 beta = tmp/gnorm2_old; 595 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 596 } 597 break; 598 599 case CG_PolakRibierePolyak: 600 snorm = step*dnorm; 601 if (!cg->diag_scaling) { 602 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 603 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 604 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha)); 605 beta = tau_k*gkp1_yk/gnorm2_old; 606 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 607 } else { 608 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); 609 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 610 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 611 beta = gkp1_yk/gnorm2_old; 612 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 613 } 614 break; 615 616 case CG_PolakRibierePlus: 617 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 618 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 619 ynorm2 = ynorm*ynorm; 620 if (!cg->diag_scaling) { 621 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 622 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 623 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha)); 624 beta = tau_k*gkp1_yk/gnorm2_old; 625 beta = PetscMax(beta, 0.0); 626 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 627 } else { 628 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Old gtDg */ 629 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 630 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 631 beta = gkp1_yk/gnorm2_old; 632 beta = PetscMax(beta, 0.0); 633 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 634 } 635 break; 636 637 case CG_DaiYuan: 638 /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property." 639 SIAM Journal on optimization 10, no. 1 (1999): 177-182. */ 640 if (!cg->diag_scaling) { 641 PetscCall(VecDot(tao->stepdirection, tao->gradient, &gd)); 642 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 643 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha)); 644 beta = tau_k*gnorm2/(gd - gd_old); 645 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 646 } else { 647 PetscCall(MatMult(cg->B, tao->stepdirection, cg->d_work)); 648 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 649 PetscCall(VecDot(cg->g_work, tao->gradient, >Dg)); 650 PetscCall(VecDot(tao->stepdirection, cg->G_old, &gd_old)); 651 PetscCall(VecDot(cg->d_work, cg->g_work, &dk_yk)); 652 dk_yk = dk_yk - gd_old; 653 beta = gtDg/dk_yk; 654 PetscCall(VecScale(cg->d_work, beta)); 655 PetscCall(VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work)); 656 } 657 break; 658 659 case CG_HagerZhang: 660 /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent." 661 ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */ 662 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 663 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 664 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 665 snorm = dnorm*step; 666 cg->yts = step*dk_yk; 667 if (cg->use_dynamic_restart) { 668 PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 669 } 670 if (cg->dynamic_restart) { 671 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 672 } else { 673 if (!cg->diag_scaling) { 674 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 675 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 676 /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */ 677 tmp = gd/dk_yk; 678 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)); 679 /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */ 680 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 681 /* d <- -t*g + beta*t*d */ 682 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 683 } else { 684 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 685 cg->yty = ynorm2; 686 cg->sts = snorm*snorm; 687 /* Apply the diagonal scaling to all my vectors */ 688 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 689 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 690 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 691 /* Construct the constant ytDgkp1 */ 692 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 693 /* Construct the constant for scaling Dkyk in the update */ 694 tmp = gd/dk_yk; 695 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 696 tau_k = -tau_k*gd/(dk_yk*dk_yk); 697 /* beta is the constant which adds the dk contribution */ 698 beta = gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */ 699 /* From HZ2013, modified to account for diagonal scaling*/ 700 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 701 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 702 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 703 /* Do the update */ 704 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 705 } 706 } 707 break; 708 709 case CG_DaiKou: 710 /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search." 711 SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */ 712 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 713 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 714 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 715 snorm = step*dnorm; 716 cg->yts = dk_yk*step; 717 if (!cg->diag_scaling) { 718 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 719 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 720 /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */ 721 tmp = gd/dk_yk; 722 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk; 723 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 724 /* d <- -t*g + beta*t*d */ 725 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk)); 726 } else { 727 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 728 cg->yty = ynorm2; 729 cg->sts = snorm*snorm; 730 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 731 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 732 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 733 /* Construct the constant ytDgkp1 */ 734 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 735 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 736 tau_k = tau_k*gd/(dk_yk*dk_yk); 737 tmp = gd/dk_yk; 738 /* beta is the constant which adds the dk contribution */ 739 beta = gkp1_yk/dk_yk - step*tmp - tau_k; 740 /* Update this for the last term in beta */ 741 PetscCall(VecDot(cg->y_work, tao->stepdirection, &dk_yk)); 742 beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */ 743 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 744 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 745 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 746 /* Do the update */ 747 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 748 } 749 break; 750 751 case CG_KouDai: 752 /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization." 753 Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */ 754 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 755 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 756 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 757 snorm = step*dnorm; 758 cg->yts = dk_yk*step; 759 if (cg->use_dynamic_restart) { 760 PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 761 } 762 if (cg->dynamic_restart) { 763 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 764 } else { 765 if (!cg->diag_scaling) { 766 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 767 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 768 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 769 if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */ 770 { 771 beta = cg->zeta*tau_k*gd/(dnorm*dnorm); 772 gamma = 0.0; 773 } else { 774 if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk; 775 /* This seems to be very effective when there's no tau_k scaling. 776 This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */ 777 else { 778 gamma = cg->xi*gd/dk_yk; 779 } 780 } 781 /* d <- -t*g + beta*t*d + t*tmp*yk */ 782 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk)); 783 } else { 784 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 785 cg->yty = ynorm2; 786 cg->sts = snorm*snorm; 787 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 788 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 789 /* Construct the constant ytDgkp1 */ 790 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1D_yk)); 791 /* Construct the constant for scaling Dkyk in the update */ 792 gamma = gd/dk_yk; 793 /* tau_k = -ytDy/(ytd)^2 * gd */ 794 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 795 tau_k = tau_k*gd/(dk_yk*dk_yk); 796 /* beta is the constant which adds the d_k contribution */ 797 beta = gkp1D_yk/dk_yk - step*gamma - tau_k; 798 /* Here is the requisite check */ 799 PetscCall(VecDot(tao->stepdirection, cg->g_work, &tmp)); 800 if (cg->neg_xi) { 801 /* modified KD implementation */ 802 if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk; 803 else { 804 gamma = cg->xi*gd/dk_yk; 805 } 806 if (beta < cg->zeta*tmp/(dnorm*dnorm)) { 807 beta = cg->zeta*tmp/(dnorm*dnorm); 808 gamma = 0.0; 809 } 810 } else { /* original KD 2015 implementation */ 811 if (beta < cg->zeta*tmp/(dnorm*dnorm)) { 812 beta = cg->zeta*tmp/(dnorm*dnorm); 813 gamma = 0.0; 814 } else { 815 gamma = cg->xi*gd/dk_yk; 816 } 817 } 818 /* Do the update in two steps */ 819 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 820 PetscCall(VecAXPY(tao->stepdirection, gamma, cg->y_work)); 821 } 822 } 823 break; 824 825 case CG_SSML_BFGS: 826 /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory." 827 Discussion Papers 269 (1977). */ 828 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 829 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 830 snorm = step*dnorm; 831 cg->yts = dk_yk*step; 832 cg->yty = ynorm2; 833 cg->sts = snorm*snorm; 834 if (!cg->diag_scaling) { 835 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 836 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 837 tmp = gd/dk_yk; 838 beta = tau_k*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*tmp; 839 /* d <- -t*g + beta*t*d + t*tmp*yk */ 840 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk)); 841 } else { 842 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 843 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 844 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 845 /* compute scalar gamma */ 846 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 847 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 848 gamma = gd/dk_yk; 849 /* Compute scalar beta */ 850 beta = (gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 851 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 852 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 853 } 854 break; 855 856 case CG_SSML_DFP: 857 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 858 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 859 snorm = step*dnorm; 860 cg->yts = dk_yk*step; 861 cg->yty = ynorm2; 862 cg->sts = snorm*snorm; 863 if (!cg->diag_scaling) { 864 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 865 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 866 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 867 tau_k = cg->dfp_scale*tau_k; 868 tmp = tau_k*gkp1_yk/cg->yty; 869 beta = -step*gd/dk_yk; 870 /* d <- -t*g + beta*d + tmp*yk */ 871 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 872 } else { 873 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */ 874 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 875 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 876 /* compute scalar gamma */ 877 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 878 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 879 gamma = (gkp1_yk/tmp); 880 /* Compute scalar beta */ 881 beta = -step*gd/dk_yk; 882 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 883 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 884 } 885 break; 886 887 case CG_SSML_BROYDEN: 888 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 889 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 890 snorm = step*dnorm; 891 cg->yts = step*dk_yk; 892 cg->yty = ynorm2; 893 cg->sts = snorm*snorm; 894 if (!cg->diag_scaling) { 895 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 896 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale)); 897 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale)); 898 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 899 tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp; 900 /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0, 901 it should reproduce the tau_dfp scaling. Same with dfp_scale. */ 902 tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/cg->yty; 903 beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 904 /* d <- -t*g + beta*d + tmp*yk */ 905 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 906 } else { 907 /* We have diagonal scaling enabled */ 908 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 909 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 910 /* compute scalar gamma */ 911 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 912 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 913 gamma = cg->theta*gd/dk_yk + (1-cg->theta)*(gkp1_yk/tmp); 914 /* Compute scalar beta */ 915 beta = cg->theta*(gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 916 /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 917 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 918 } 919 break; 920 921 default: 922 break; 923 924 } 925 } 926 PetscFunctionReturn(0); 927 } 928 929 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm) 930 { 931 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 932 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 933 PetscReal step=1.0,gnorm2,gd,dnorm=0.0; 934 PetscReal gnorm2_old,f_old,resnorm, gnorm_old; 935 PetscBool pcgd_fallback = PETSC_FALSE; 936 937 PetscFunctionBegin; 938 /* We are now going to perform a line search along the direction. */ 939 /* Store solution and gradient info before it changes */ 940 PetscCall(VecCopy(tao->solution, cg->X_old)); 941 PetscCall(VecCopy(tao->gradient, cg->G_old)); 942 PetscCall(VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old)); 943 944 gnorm_old = gnorm; 945 gnorm2_old = gnorm_old*gnorm_old; 946 f_old = cg->f; 947 /* Perform bounded line search. If we are recycling a solution from a previous */ 948 /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */ 949 if (!(tao->recycle && 0 == tao->niter)) { 950 /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */ 951 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 952 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 953 PetscCall(TaoAddLineSearchCounts(tao)); 954 955 /* Check linesearch failure */ 956 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 957 ++cg->ls_fails; 958 if (cg->cg_type == CG_GradientDescent) { 959 /* Nothing left to do but fail out of the optimization */ 960 step = 0.0; 961 tao->reason = TAO_DIVERGED_LS_FAILURE; 962 } else { 963 /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */ 964 PetscCall(VecCopy(cg->X_old, tao->solution)); 965 PetscCall(VecCopy(cg->G_old, tao->gradient)); 966 PetscCall(VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient)); 967 gnorm = gnorm_old; 968 gnorm2 = gnorm2_old; 969 cg->f = f_old; 970 971 /* Fall back on preconditioned CG (so long as you're not already using it) */ 972 if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling) { 973 pcgd_fallback = PETSC_TRUE; 974 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 975 976 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 977 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 978 979 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 980 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 981 PetscCall(TaoAddLineSearchCounts(tao)); 982 983 pcgd_fallback = PETSC_FALSE; 984 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 985 /* Going to perform a regular gradient descent step. */ 986 ++cg->ls_fails; 987 step = 0.0; 988 } 989 } 990 /* Fall back on the scaled gradient step */ 991 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 992 ++cg->ls_fails; 993 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 994 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 995 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 996 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 997 PetscCall(TaoAddLineSearchCounts(tao)); 998 } 999 1000 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 1001 /* Nothing left to do but fail out of the optimization */ 1002 ++cg->ls_fails; 1003 step = 0.0; 1004 tao->reason = TAO_DIVERGED_LS_FAILURE; 1005 } else { 1006 /* One of the fallbacks worked. Set them both back equal to false. */ 1007 pcgd_fallback = PETSC_FALSE; 1008 } 1009 } 1010 } 1011 /* Convergence test for line search failure */ 1012 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1013 1014 /* Standard convergence test */ 1015 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 1016 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 1017 PetscCheck(!PetscIsInfOrNanReal(resnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 1018 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 1019 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 1020 PetscCall((*tao->ops->convergencetest)(tao,tao->cnvP)); 1021 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1022 } 1023 /* Assert we have an updated step and we need at least one more iteration. */ 1024 /* Calculate the next direction */ 1025 /* Estimate the active set at the new solution */ 1026 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 1027 /* Compute the projected gradient and its norm */ 1028 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 1029 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 1030 PetscCall(VecNorm(tao->gradient,NORM_2,&gnorm)); 1031 gnorm2 = gnorm*gnorm; 1032 1033 /* Calculate some quantities used in the StepDirectionUpdate. */ 1034 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 1035 /* Update the step direction. */ 1036 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 1037 ++tao->niter; 1038 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 1039 1040 if (cg->cg_type != CG_GradientDescent) { 1041 /* Figure out which previously active variables became inactive this iteration */ 1042 PetscCall(ISDestroy(&cg->new_inactives)); 1043 if (cg->inactive_idx && cg->inactive_old) { 1044 PetscCall(ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives)); 1045 } 1046 /* Selectively reset the CG step those freshly inactive variables */ 1047 if (cg->new_inactives) { 1048 PetscCall(VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 1049 PetscCall(VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 1050 PetscCall(VecCopy(cg->inactive_grad, cg->inactive_step)); 1051 PetscCall(VecScale(cg->inactive_step, -1.0)); 1052 PetscCall(VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 1053 PetscCall(VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 1054 } 1055 /* Verify that this is a descent direction */ 1056 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 1057 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 1058 if (PetscIsInfOrNanReal(gd) || (gd/(dnorm*dnorm) <= -1e10 || gd/(dnorm*dnorm) >= -1e-10)) { 1059 /* Not a descent direction, so we reset back to projected gradient descent */ 1060 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 1061 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 1062 ++cg->descent_error; 1063 } else { 1064 } 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0) 1070 { 1071 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 1072 1073 PetscFunctionBegin; 1074 PetscCall(PetscObjectReference((PetscObject)H0)); 1075 cg->pc = H0; 1076 PetscFunctionReturn(0); 1077 } 1078