1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/ 3 #include <petscksp.h> 4 5 #define CG_GradientDescent 0 6 #define CG_HestenesStiefel 1 7 #define CG_FletcherReeves 2 8 #define CG_PolakRibierePolyak 3 9 #define CG_PolakRibierePlus 4 10 #define CG_DaiYuan 5 11 #define CG_HagerZhang 6 12 #define CG_DaiKou 7 13 #define CG_KouDai 8 14 #define CG_SSML_BFGS 9 15 #define CG_SSML_DFP 10 16 #define CG_SSML_BROYDEN 11 17 #define CG_PCGradientDescent 12 18 #define CGTypes 13 19 20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"}; 21 22 #define CG_AS_NONE 0 23 #define CG_AS_BERTSEKAS 1 24 #define CG_AS_SIZE 2 25 26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 27 28 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) 29 { 30 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 31 32 PetscFunctionBegin; 33 PetscCall(ISDestroy(&cg->inactive_old)); 34 if (cg->inactive_idx) { 35 PetscCall(ISDuplicate(cg->inactive_idx, &cg->inactive_old)); 36 PetscCall(ISCopy(cg->inactive_idx, cg->inactive_old)); 37 } 38 switch (asType) { 39 case CG_AS_NONE: 40 PetscCall(ISDestroy(&cg->inactive_idx)); 41 PetscCall(VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx)); 42 PetscCall(ISDestroy(&cg->active_idx)); 43 PetscCall(ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx)); 44 break; 45 46 case CG_AS_BERTSEKAS: 47 /* Use gradient descent to estimate the active set */ 48 PetscCall(VecCopy(cg->unprojected_gradient, cg->W)); 49 PetscCall(VecScale(cg->W, -1.0)); 50 PetscCall(TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, 51 &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx)); 52 break; 53 default: 54 break; 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step) 60 { 61 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 62 63 PetscFunctionBegin; 64 switch (asType) { 65 case CG_AS_NONE: 66 PetscCall(VecISSet(step, cg->active_idx, 0.0)); 67 break; 68 69 case CG_AS_BERTSEKAS: 70 PetscCall(TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step)); 71 break; 72 73 default: 74 break; 75 } 76 PetscFunctionReturn(0); 77 } 78 79 static PetscErrorCode TaoSolve_BNCG(Tao tao) 80 { 81 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 82 PetscReal step=1.0,gnorm,gnorm2, resnorm; 83 PetscInt nDiff; 84 85 PetscFunctionBegin; 86 /* Project the current point onto the feasible set */ 87 PetscCall(TaoComputeVariableBounds(tao)); 88 PetscCall(TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU)); 89 90 /* Project the initial point onto the feasible region */ 91 PetscCall(TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution)); 92 93 if (nDiff > 0 || !tao->recycle) { 94 PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 95 } 96 PetscCall(VecNorm(cg->unprojected_gradient,NORM_2,&gnorm)); 97 PetscCheck(!PetscIsInfOrNanReal(cg->f) && !PetscIsInfOrNanReal(gnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 98 99 /* Estimate the active set and compute the projected gradient */ 100 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 101 102 /* Project the gradient and calculate the norm */ 103 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 104 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 105 PetscCall(VecNorm(tao->gradient,NORM_2,&gnorm)); 106 gnorm2 = gnorm*gnorm; 107 108 /* Initialize counters */ 109 tao->niter = 0; 110 cg->ls_fails = cg->descent_error = 0; 111 cg->resets = -1; 112 cg->skipped_updates = cg->pure_gd_steps = 0; 113 cg->iter_quad = 0; 114 115 /* Convergence test at the starting point. */ 116 tao->reason = TAO_CONTINUE_ITERATING; 117 118 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 119 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 120 PetscCheck(!PetscIsInfOrNanReal(resnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 121 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 122 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 123 PetscCall((*tao->ops->convergencetest)(tao,tao->cnvP)); 124 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 125 /* Calculate initial direction. */ 126 if (!tao->recycle) { 127 /* We are not recycling a solution/history from a past TaoSolve */ 128 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 129 } 130 /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */ 131 while (1) { 132 /* Call general purpose update function */ 133 if (tao->ops->update) { 134 PetscCall((*tao->ops->update)(tao, tao->niter, tao->user_update)); 135 PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 136 } 137 PetscCall(TaoBNCGConductIteration(tao, gnorm)); 138 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 139 } 140 } 141 142 static PetscErrorCode TaoSetUp_BNCG(Tao tao) 143 { 144 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 145 146 PetscFunctionBegin; 147 if (!tao->gradient) { 148 PetscCall(VecDuplicate(tao->solution,&tao->gradient)); 149 } 150 if (!tao->stepdirection) { 151 PetscCall(VecDuplicate(tao->solution,&tao->stepdirection)); 152 } 153 if (!cg->W) { 154 PetscCall(VecDuplicate(tao->solution,&cg->W)); 155 } 156 if (!cg->work) { 157 PetscCall(VecDuplicate(tao->solution,&cg->work)); 158 } 159 if (!cg->sk) { 160 PetscCall(VecDuplicate(tao->solution,&cg->sk)); 161 } 162 if (!cg->yk) { 163 PetscCall(VecDuplicate(tao->gradient,&cg->yk)); 164 } 165 if (!cg->X_old) { 166 PetscCall(VecDuplicate(tao->solution,&cg->X_old)); 167 } 168 if (!cg->G_old) { 169 PetscCall(VecDuplicate(tao->gradient,&cg->G_old)); 170 } 171 if (cg->diag_scaling) { 172 PetscCall(VecDuplicate(tao->solution,&cg->d_work)); 173 PetscCall(VecDuplicate(tao->solution,&cg->y_work)); 174 PetscCall(VecDuplicate(tao->solution,&cg->g_work)); 175 } 176 if (!cg->unprojected_gradient) { 177 PetscCall(VecDuplicate(tao->gradient,&cg->unprojected_gradient)); 178 } 179 if (!cg->unprojected_gradient_old) { 180 PetscCall(VecDuplicate(tao->gradient,&cg->unprojected_gradient_old)); 181 } 182 PetscCall(MatLMVMAllocate(cg->B, cg->sk, cg->yk)); 183 if (cg->pc) PetscCall(MatLMVMSetJ0(cg->B, cg->pc)); 184 PetscFunctionReturn(0); 185 } 186 187 static PetscErrorCode TaoDestroy_BNCG(Tao tao) 188 { 189 TAO_BNCG *cg = (TAO_BNCG*) tao->data; 190 191 PetscFunctionBegin; 192 if (tao->setupcalled) { 193 PetscCall(VecDestroy(&cg->W)); 194 PetscCall(VecDestroy(&cg->work)); 195 PetscCall(VecDestroy(&cg->X_old)); 196 PetscCall(VecDestroy(&cg->G_old)); 197 PetscCall(VecDestroy(&cg->unprojected_gradient)); 198 PetscCall(VecDestroy(&cg->unprojected_gradient_old)); 199 PetscCall(VecDestroy(&cg->g_work)); 200 PetscCall(VecDestroy(&cg->d_work)); 201 PetscCall(VecDestroy(&cg->y_work)); 202 PetscCall(VecDestroy(&cg->sk)); 203 PetscCall(VecDestroy(&cg->yk)); 204 } 205 PetscCall(ISDestroy(&cg->active_lower)); 206 PetscCall(ISDestroy(&cg->active_upper)); 207 PetscCall(ISDestroy(&cg->active_fixed)); 208 PetscCall(ISDestroy(&cg->active_idx)); 209 PetscCall(ISDestroy(&cg->inactive_idx)); 210 PetscCall(ISDestroy(&cg->inactive_old)); 211 PetscCall(ISDestroy(&cg->new_inactives)); 212 PetscCall(MatDestroy(&cg->B)); 213 if (cg->pc) { 214 PetscCall(MatDestroy(&cg->pc)); 215 } 216 PetscCall(PetscFree(tao->data)); 217 PetscFunctionReturn(0); 218 } 219 220 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao) 221 { 222 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 223 224 PetscFunctionBegin; 225 PetscOptionsHeadBegin(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization"); 226 PetscCall(PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type,NULL)); 227 if (cg->cg_type != CG_SSML_BFGS) cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */ 228 if (CG_GradientDescent == cg->cg_type) { 229 cg->cg_type = CG_PCGradientDescent; 230 /* Set scaling equal to none or, at best, scalar scaling. */ 231 cg->unscaled_restart = PETSC_TRUE; 232 cg->diag_scaling = PETSC_FALSE; 233 } 234 PetscCall(PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL)); 235 PetscCall(PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL)); 236 PetscCall(PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL)); 237 PetscCall(PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL)); 238 PetscCall(PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL)); 239 PetscCall(PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL)); 240 PetscCall(PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL)); 241 PetscCall(PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL)); 242 PetscCall(PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL)); 243 PetscCall(PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL)); 244 PetscCall(PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL)); 245 PetscCall(PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL)); 246 PetscCall(PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL)); 247 PetscCall(PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL)); 248 PetscCall(PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL)); 249 PetscCall(PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL)); 250 PetscCall(PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL)); 251 PetscCall(PetscOptionsBool("-tao_bncg_no_scaling","Disable all scaling except in restarts","",cg->no_scaling,&cg->no_scaling,NULL)); 252 if (cg->no_scaling) { 253 cg->diag_scaling = PETSC_FALSE; 254 cg->alpha = -1.0; 255 } 256 if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling) { /* Some more default options that appear to be good. */ 257 cg->neg_xi = PETSC_TRUE; 258 } 259 PetscCall(PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL)); 260 PetscCall(PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL)); 261 PetscCall(PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL)); 262 PetscCall(PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts","",cg->delta_min,&cg->delta_min,NULL)); 263 PetscCall(PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts","",cg->delta_max,&cg->delta_max,NULL)); 264 265 PetscOptionsHeadEnd(); 266 PetscCall(MatSetOptionsPrefix(cg->B, ((PetscObject)tao)->prefix)); 267 PetscCall(MatAppendOptionsPrefix(cg->B, "tao_bncg_")); 268 PetscCall(MatSetFromOptions(cg->B)); 269 PetscFunctionReturn(0); 270 } 271 272 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) 273 { 274 PetscBool isascii; 275 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 276 277 PetscFunctionBegin; 278 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 279 if (isascii) { 280 PetscCall(PetscViewerASCIIPushTab(viewer)); 281 PetscCall(PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type])); 282 PetscCall(PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %" PetscInt_FMT "\n", cg->skipped_updates)); 283 PetscCall(PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %" PetscInt_FMT "\n", cg->resets)); 284 PetscCall(PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %" PetscInt_FMT "\n", cg->pure_gd_steps)); 285 PetscCall(PetscViewerASCIIPrintf(viewer, "Not a descent direction: %" PetscInt_FMT "\n", cg->descent_error)); 286 PetscCall(PetscViewerASCIIPrintf(viewer, "Line search fails: %" PetscInt_FMT "\n", cg->ls_fails)); 287 if (cg->diag_scaling) { 288 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); 289 if (isascii) { 290 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 291 PetscCall(MatView(cg->B, viewer)); 292 PetscCall(PetscViewerPopFormat(viewer)); 293 } 294 } 295 PetscCall(PetscViewerASCIIPopTab(viewer)); 296 } 297 PetscFunctionReturn(0); 298 } 299 300 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha) 301 { 302 PetscReal a, b, c, sig1, sig2; 303 304 PetscFunctionBegin; 305 *scale = 0.0; 306 if (1.0 == alpha) *scale = yts/yty; 307 else if (0.0 == alpha) *scale = sts/yts; 308 else if (-1.0 == alpha) *scale = 1.0; 309 else { 310 a = yty; 311 b = yts; 312 c = sts; 313 a *= alpha; 314 b *= -(2.0*alpha - 1.0); 315 c *= alpha - 1.0; 316 sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 317 sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 318 /* accept the positive root as the scalar */ 319 if (sig1 > 0.0) *scale = sig1; 320 else if (sig2 > 0.0) *scale = sig2; 321 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar"); 322 } 323 PetscFunctionReturn(0); 324 } 325 326 /*MC 327 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 328 329 Options Database Keys: 330 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled) 331 . -tao_bncg_eta <r> - restart tolerance 332 . -tao_bncg_type <taocg_type> - cg formula 333 . -tao_bncg_as_type <none,bertsekas> - active set estimation method 334 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation 335 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation 336 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision. 337 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method 338 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 339 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 340 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method 341 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method 342 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method 343 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method 344 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True. 345 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods 346 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts 347 . -tao_bncg_zeta <r> - Scaling parameter in the KD method 348 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps 349 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps 350 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart 351 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem 352 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations 353 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults. 354 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions 355 356 Notes: 357 CG formulas are: 358 + "gd" - Gradient Descent 359 . "fr" - Fletcher-Reeves 360 . "pr" - Polak-Ribiere-Polyak 361 . "prp" - Polak-Ribiere-Plus 362 . "hs" - Hestenes-Steifel 363 . "dy" - Dai-Yuan 364 . "ssml_bfgs" - Self-Scaling Memoryless BFGS 365 . "ssml_dfp" - Self-Scaling Memoryless DFP 366 . "ssml_brdn" - Self-Scaling Memoryless Broyden 367 . "hz" - Hager-Zhang (CG_DESCENT 5.3) 368 . "dk" - Dai-Kou (2013) 369 - "kd" - Kou-Dai (2015) 370 371 Level: beginner 372 M*/ 373 374 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) 375 { 376 TAO_BNCG *cg; 377 const char *morethuente_type = TAOLINESEARCHMT; 378 379 PetscFunctionBegin; 380 tao->ops->setup = TaoSetUp_BNCG; 381 tao->ops->solve = TaoSolve_BNCG; 382 tao->ops->view = TaoView_BNCG; 383 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 384 tao->ops->destroy = TaoDestroy_BNCG; 385 386 /* Override default settings (unless already changed) */ 387 if (!tao->max_it_changed) tao->max_it = 2000; 388 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 389 390 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 391 /* method. In particular, gtol should be less that 0.5; the value used in */ 392 /* Nocedal and Wright is 0.10. We use the default values for the */ 393 /* linesearch because it seems to work better. */ 394 PetscCall(TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch)); 395 PetscCall(PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1)); 396 PetscCall(TaoLineSearchSetType(tao->linesearch, morethuente_type)); 397 PetscCall(TaoLineSearchUseTaoRoutines(tao->linesearch, tao)); 398 399 PetscCall(PetscNewLog(tao,&cg)); 400 tao->data = (void*)cg; 401 PetscCall(KSPInitializePackage()); 402 PetscCall(MatCreate(PetscObjectComm((PetscObject)tao), &cg->B)); 403 PetscCall(PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1)); 404 PetscCall(MatSetType(cg->B, MATLMVMDIAGBROYDEN)); 405 406 cg->pc = NULL; 407 408 cg->dk_eta = 0.5; 409 cg->hz_eta = 0.4; 410 cg->dynamic_restart = PETSC_FALSE; 411 cg->unscaled_restart = PETSC_FALSE; 412 cg->no_scaling = PETSC_FALSE; 413 cg->delta_min = 1e-7; 414 cg->delta_max = 100; 415 cg->theta = 1.0; 416 cg->hz_theta = 1.0; 417 cg->dfp_scale = 1.0; 418 cg->bfgs_scale = 1.0; 419 cg->zeta = 0.1; 420 cg->min_quad = 6; 421 cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/ 422 cg->xi = 1.0; 423 cg->neg_xi = PETSC_TRUE; 424 cg->spaced_restart = PETSC_FALSE; 425 cg->tol_quad = 1e-8; 426 cg->as_step = 0.001; 427 cg->as_tol = 0.001; 428 cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/ 429 cg->as_type = CG_AS_BERTSEKAS; 430 cg->cg_type = CG_SSML_BFGS; 431 cg->alpha = 1.0; 432 cg->diag_scaling = PETSC_TRUE; 433 PetscFunctionReturn(0); 434 } 435 436 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq) 437 { 438 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 439 PetscReal scaling; 440 441 PetscFunctionBegin; 442 ++cg->resets; 443 scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23); 444 scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling)); 445 if (cg->unscaled_restart) { 446 scaling = 1.0; 447 ++cg->pure_gd_steps; 448 } 449 PetscCall(VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient)); 450 /* Also want to reset our diagonal scaling with each restart */ 451 if (cg->diag_scaling) PetscCall(MatLMVMReset(cg->B, PETSC_FALSE)); 452 PetscFunctionReturn(0); 453 } 454 455 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold) 456 { 457 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 458 PetscReal quadinterp; 459 460 PetscFunctionBegin; 461 if (cg->f < cg->min_quad/10) { 462 *dynrestart = PETSC_FALSE; 463 PetscFunctionReturn(0); 464 } /* just skip this since this strategy doesn't work well for functions near zero */ 465 quadinterp = 2.0*(cg->f - fold)/(stepsize*(gd + gd_old)); 466 if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad; 467 else { 468 cg->iter_quad = 0; 469 *dynrestart = PETSC_FALSE; 470 } 471 if (cg->iter_quad >= cg->min_quad) { 472 cg->iter_quad = 0; 473 *dynrestart = PETSC_TRUE; 474 } 475 PetscFunctionReturn(0); 476 } 477 478 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback) 479 { 480 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 481 PetscReal gamma = 1.0, tau_k, beta; 482 PetscReal tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk=1.0, gd; 483 PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg; 484 PetscInt dim; 485 PetscBool cg_restart = PETSC_FALSE; 486 PetscFunctionBegin; 487 488 /* Local curvature check to see if we need to restart */ 489 if (tao->niter >= 1 || tao->recycle) { 490 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 491 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 492 ynorm2 = ynorm*ynorm; 493 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 494 if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON) { 495 cg_restart = PETSC_TRUE; 496 ++cg->skipped_updates; 497 } 498 if (cg->spaced_restart) { 499 PetscCall(VecGetSize(tao->gradient, &dim)); 500 if (tao->niter % (dim*cg->min_restart_num)) cg_restart = PETSC_TRUE; 501 } 502 } 503 /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */ 504 if (cg->spaced_restart) { 505 PetscCall(VecGetSize(tao->gradient, &dim)); 506 if (0 == tao->niter % (6*dim)) cg_restart = PETSC_TRUE; 507 } 508 /* Compute the diagonal scaling vector if applicable */ 509 if (cg->diag_scaling) PetscCall(MatLMVMUpdate(cg->B, tao->solution, tao->gradient)); 510 511 /* A note on diagonal scaling (to be added to paper): 512 For the FR, PR, PRP, and DY methods, the diagonally scaled versions 513 must be derived as a preconditioned CG method rather than as 514 a Hessian initialization like in the Broyden methods. */ 515 516 /* In that case, one writes the objective function as 517 f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k). 518 Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to 519 HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the 520 same under preconditioning. Note that A is diagonal, such that A^T = A. */ 521 522 /* This yields questions like what the dot product d_k^T y_k 523 should look like. HZ mistakenly treats that as the same under 524 preconditioning, but that is not necessarily true. */ 525 526 /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation, 527 we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}), 528 yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is 529 NOT the same if our preconditioning matrix is updated between iterations. 530 This same issue is found when considering dot products of the form g_{k+1}^T y_k. */ 531 532 /* Compute CG step direction */ 533 if (cg_restart) { 534 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 535 } else if (pcgd_fallback) { 536 /* Just like preconditioned CG */ 537 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 538 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 539 } else if (ynorm2 > PETSC_MACHINE_EPSILON) { 540 switch (cg->cg_type) { 541 case CG_PCGradientDescent: 542 if (!cg->diag_scaling) { 543 if (!cg->no_scaling) { 544 cg->sts = step*step*dnorm*dnorm; 545 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha)); 546 } else { 547 tau_k = 1.0; 548 ++cg->pure_gd_steps; 549 } 550 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient)); 551 } else { 552 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 553 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 554 } 555 break; 556 557 case CG_HestenesStiefel: 558 /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */ 559 if (!cg->diag_scaling) { 560 cg->sts = step*step*dnorm*dnorm; 561 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 562 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha)); 563 beta = tau_k*gkp1_yk/dk_yk; 564 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 565 } else { 566 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 567 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 568 beta = gkp1_yk/dk_yk; 569 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 570 } 571 break; 572 573 case CG_FletcherReeves: 574 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 575 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 576 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 577 ynorm2 = ynorm*ynorm; 578 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 579 if (!cg->diag_scaling) { 580 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha)); 581 beta = tau_k*gnorm2/gnorm2_old; 582 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 583 } else { 584 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Before it's updated */ 585 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 586 PetscCall(VecDot(tao->gradient, cg->g_work, &tmp)); 587 beta = tmp/gnorm2_old; 588 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 589 } 590 break; 591 592 case CG_PolakRibierePolyak: 593 snorm = step*dnorm; 594 if (!cg->diag_scaling) { 595 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 596 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 597 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha)); 598 beta = tau_k*gkp1_yk/gnorm2_old; 599 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 600 } else { 601 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); 602 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 603 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 604 beta = gkp1_yk/gnorm2_old; 605 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 606 } 607 break; 608 609 case CG_PolakRibierePlus: 610 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 611 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 612 ynorm2 = ynorm*ynorm; 613 if (!cg->diag_scaling) { 614 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 615 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 616 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha)); 617 beta = tau_k*gkp1_yk/gnorm2_old; 618 beta = PetscMax(beta, 0.0); 619 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 620 } else { 621 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Old gtDg */ 622 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 623 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 624 beta = gkp1_yk/gnorm2_old; 625 beta = PetscMax(beta, 0.0); 626 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 627 } 628 break; 629 630 case CG_DaiYuan: 631 /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property." 632 SIAM Journal on optimization 10, no. 1 (1999): 177-182. */ 633 if (!cg->diag_scaling) { 634 PetscCall(VecDot(tao->stepdirection, tao->gradient, &gd)); 635 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 636 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha)); 637 beta = tau_k*gnorm2/(gd - gd_old); 638 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 639 } else { 640 PetscCall(MatMult(cg->B, tao->stepdirection, cg->d_work)); 641 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 642 PetscCall(VecDot(cg->g_work, tao->gradient, >Dg)); 643 PetscCall(VecDot(tao->stepdirection, cg->G_old, &gd_old)); 644 PetscCall(VecDot(cg->d_work, cg->g_work, &dk_yk)); 645 dk_yk = dk_yk - gd_old; 646 beta = gtDg/dk_yk; 647 PetscCall(VecScale(cg->d_work, beta)); 648 PetscCall(VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work)); 649 } 650 break; 651 652 case CG_HagerZhang: 653 /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent." 654 ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */ 655 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 656 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 657 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 658 snorm = dnorm*step; 659 cg->yts = step*dk_yk; 660 if (cg->use_dynamic_restart) { 661 PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 662 } 663 if (cg->dynamic_restart) { 664 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 665 } else { 666 if (!cg->diag_scaling) { 667 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 668 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 669 /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */ 670 tmp = gd/dk_yk; 671 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)); 672 /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */ 673 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 674 /* d <- -t*g + beta*t*d */ 675 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 676 } else { 677 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 678 cg->yty = ynorm2; 679 cg->sts = snorm*snorm; 680 /* Apply the diagonal scaling to all my vectors */ 681 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 682 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 683 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 684 /* Construct the constant ytDgkp1 */ 685 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 686 /* Construct the constant for scaling Dkyk in the update */ 687 tmp = gd/dk_yk; 688 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 689 tau_k = -tau_k*gd/(dk_yk*dk_yk); 690 /* beta is the constant which adds the dk contribution */ 691 beta = gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */ 692 /* From HZ2013, modified to account for diagonal scaling*/ 693 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 694 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 695 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 696 /* Do the update */ 697 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 698 } 699 } 700 break; 701 702 case CG_DaiKou: 703 /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search." 704 SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */ 705 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 706 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 707 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 708 snorm = step*dnorm; 709 cg->yts = dk_yk*step; 710 if (!cg->diag_scaling) { 711 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 712 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 713 /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */ 714 tmp = gd/dk_yk; 715 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk; 716 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 717 /* d <- -t*g + beta*t*d */ 718 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk)); 719 } else { 720 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 721 cg->yty = ynorm2; 722 cg->sts = snorm*snorm; 723 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 724 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 725 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 726 /* Construct the constant ytDgkp1 */ 727 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 728 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 729 tau_k = tau_k*gd/(dk_yk*dk_yk); 730 tmp = gd/dk_yk; 731 /* beta is the constant which adds the dk contribution */ 732 beta = gkp1_yk/dk_yk - step*tmp - tau_k; 733 /* Update this for the last term in beta */ 734 PetscCall(VecDot(cg->y_work, tao->stepdirection, &dk_yk)); 735 beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */ 736 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 737 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 738 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 739 /* Do the update */ 740 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 741 } 742 break; 743 744 case CG_KouDai: 745 /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization." 746 Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */ 747 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 748 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 749 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 750 snorm = step*dnorm; 751 cg->yts = dk_yk*step; 752 if (cg->use_dynamic_restart) { 753 PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 754 } 755 if (cg->dynamic_restart) { 756 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 757 } else { 758 if (!cg->diag_scaling) { 759 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 760 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 761 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 762 if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */ 763 { 764 beta = cg->zeta*tau_k*gd/(dnorm*dnorm); 765 gamma = 0.0; 766 } else { 767 if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk; 768 /* This seems to be very effective when there's no tau_k scaling. 769 This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */ 770 else { 771 gamma = cg->xi*gd/dk_yk; 772 } 773 } 774 /* d <- -t*g + beta*t*d + t*tmp*yk */ 775 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk)); 776 } else { 777 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 778 cg->yty = ynorm2; 779 cg->sts = snorm*snorm; 780 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 781 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 782 /* Construct the constant ytDgkp1 */ 783 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1D_yk)); 784 /* Construct the constant for scaling Dkyk in the update */ 785 gamma = gd/dk_yk; 786 /* tau_k = -ytDy/(ytd)^2 * gd */ 787 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 788 tau_k = tau_k*gd/(dk_yk*dk_yk); 789 /* beta is the constant which adds the d_k contribution */ 790 beta = gkp1D_yk/dk_yk - step*gamma - tau_k; 791 /* Here is the requisite check */ 792 PetscCall(VecDot(tao->stepdirection, cg->g_work, &tmp)); 793 if (cg->neg_xi) { 794 /* modified KD implementation */ 795 if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk; 796 else { 797 gamma = cg->xi*gd/dk_yk; 798 } 799 if (beta < cg->zeta*tmp/(dnorm*dnorm)) { 800 beta = cg->zeta*tmp/(dnorm*dnorm); 801 gamma = 0.0; 802 } 803 } else { /* original KD 2015 implementation */ 804 if (beta < cg->zeta*tmp/(dnorm*dnorm)) { 805 beta = cg->zeta*tmp/(dnorm*dnorm); 806 gamma = 0.0; 807 } else { 808 gamma = cg->xi*gd/dk_yk; 809 } 810 } 811 /* Do the update in two steps */ 812 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 813 PetscCall(VecAXPY(tao->stepdirection, gamma, cg->y_work)); 814 } 815 } 816 break; 817 818 case CG_SSML_BFGS: 819 /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory." 820 Discussion Papers 269 (1977). */ 821 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 822 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 823 snorm = step*dnorm; 824 cg->yts = dk_yk*step; 825 cg->yty = ynorm2; 826 cg->sts = snorm*snorm; 827 if (!cg->diag_scaling) { 828 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 829 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 830 tmp = gd/dk_yk; 831 beta = tau_k*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*tmp; 832 /* d <- -t*g + beta*t*d + t*tmp*yk */ 833 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk)); 834 } else { 835 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 836 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 837 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 838 /* compute scalar gamma */ 839 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 840 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 841 gamma = gd/dk_yk; 842 /* Compute scalar beta */ 843 beta = (gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 844 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 845 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 846 } 847 break; 848 849 case CG_SSML_DFP: 850 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 851 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 852 snorm = step*dnorm; 853 cg->yts = dk_yk*step; 854 cg->yty = ynorm2; 855 cg->sts = snorm*snorm; 856 if (!cg->diag_scaling) { 857 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 858 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 859 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 860 tau_k = cg->dfp_scale*tau_k; 861 tmp = tau_k*gkp1_yk/cg->yty; 862 beta = -step*gd/dk_yk; 863 /* d <- -t*g + beta*d + tmp*yk */ 864 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 865 } else { 866 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */ 867 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 868 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 869 /* compute scalar gamma */ 870 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 871 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 872 gamma = (gkp1_yk/tmp); 873 /* Compute scalar beta */ 874 beta = -step*gd/dk_yk; 875 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 876 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 877 } 878 break; 879 880 case CG_SSML_BROYDEN: 881 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 882 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 883 snorm = step*dnorm; 884 cg->yts = step*dk_yk; 885 cg->yty = ynorm2; 886 cg->sts = snorm*snorm; 887 if (!cg->diag_scaling) { 888 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 889 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale)); 890 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale)); 891 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 892 tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp; 893 /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0, 894 it should reproduce the tau_dfp scaling. Same with dfp_scale. */ 895 tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/cg->yty; 896 beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 897 /* d <- -t*g + beta*d + tmp*yk */ 898 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 899 } else { 900 /* We have diagonal scaling enabled */ 901 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 902 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 903 /* compute scalar gamma */ 904 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 905 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 906 gamma = cg->theta*gd/dk_yk + (1-cg->theta)*(gkp1_yk/tmp); 907 /* Compute scalar beta */ 908 beta = cg->theta*(gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 909 /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 910 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 911 } 912 break; 913 914 default: 915 break; 916 917 } 918 } 919 PetscFunctionReturn(0); 920 } 921 922 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm) 923 { 924 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 925 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 926 PetscReal step=1.0,gnorm2,gd,dnorm=0.0; 927 PetscReal gnorm2_old,f_old,resnorm, gnorm_old; 928 PetscBool pcgd_fallback = PETSC_FALSE; 929 930 PetscFunctionBegin; 931 /* We are now going to perform a line search along the direction. */ 932 /* Store solution and gradient info before it changes */ 933 PetscCall(VecCopy(tao->solution, cg->X_old)); 934 PetscCall(VecCopy(tao->gradient, cg->G_old)); 935 PetscCall(VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old)); 936 937 gnorm_old = gnorm; 938 gnorm2_old = gnorm_old*gnorm_old; 939 f_old = cg->f; 940 /* Perform bounded line search. If we are recycling a solution from a previous */ 941 /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */ 942 if (!(tao->recycle && 0 == tao->niter)) { 943 /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */ 944 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 945 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 946 PetscCall(TaoAddLineSearchCounts(tao)); 947 948 /* Check linesearch failure */ 949 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 950 ++cg->ls_fails; 951 if (cg->cg_type == CG_GradientDescent) { 952 /* Nothing left to do but fail out of the optimization */ 953 step = 0.0; 954 tao->reason = TAO_DIVERGED_LS_FAILURE; 955 } else { 956 /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */ 957 PetscCall(VecCopy(cg->X_old, tao->solution)); 958 PetscCall(VecCopy(cg->G_old, tao->gradient)); 959 PetscCall(VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient)); 960 gnorm = gnorm_old; 961 gnorm2 = gnorm2_old; 962 cg->f = f_old; 963 964 /* Fall back on preconditioned CG (so long as you're not already using it) */ 965 if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling) { 966 pcgd_fallback = PETSC_TRUE; 967 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 968 969 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 970 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 971 972 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 973 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 974 PetscCall(TaoAddLineSearchCounts(tao)); 975 976 pcgd_fallback = PETSC_FALSE; 977 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 978 /* Going to perform a regular gradient descent step. */ 979 ++cg->ls_fails; 980 step = 0.0; 981 } 982 } 983 /* Fall back on the scaled gradient step */ 984 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 985 ++cg->ls_fails; 986 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 987 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 988 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 989 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 990 PetscCall(TaoAddLineSearchCounts(tao)); 991 } 992 993 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 994 /* Nothing left to do but fail out of the optimization */ 995 ++cg->ls_fails; 996 step = 0.0; 997 tao->reason = TAO_DIVERGED_LS_FAILURE; 998 } else { 999 /* One of the fallbacks worked. Set them both back equal to false. */ 1000 pcgd_fallback = PETSC_FALSE; 1001 } 1002 } 1003 } 1004 /* Convergence test for line search failure */ 1005 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1006 1007 /* Standard convergence test */ 1008 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 1009 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 1010 PetscCheck(!PetscIsInfOrNanReal(resnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 1011 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 1012 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 1013 PetscCall((*tao->ops->convergencetest)(tao,tao->cnvP)); 1014 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1015 } 1016 /* Assert we have an updated step and we need at least one more iteration. */ 1017 /* Calculate the next direction */ 1018 /* Estimate the active set at the new solution */ 1019 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 1020 /* Compute the projected gradient and its norm */ 1021 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 1022 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 1023 PetscCall(VecNorm(tao->gradient,NORM_2,&gnorm)); 1024 gnorm2 = gnorm*gnorm; 1025 1026 /* Calculate some quantities used in the StepDirectionUpdate. */ 1027 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 1028 /* Update the step direction. */ 1029 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 1030 ++tao->niter; 1031 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 1032 1033 if (cg->cg_type != CG_GradientDescent) { 1034 /* Figure out which previously active variables became inactive this iteration */ 1035 PetscCall(ISDestroy(&cg->new_inactives)); 1036 if (cg->inactive_idx && cg->inactive_old) { 1037 PetscCall(ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives)); 1038 } 1039 /* Selectively reset the CG step those freshly inactive variables */ 1040 if (cg->new_inactives) { 1041 PetscCall(VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 1042 PetscCall(VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 1043 PetscCall(VecCopy(cg->inactive_grad, cg->inactive_step)); 1044 PetscCall(VecScale(cg->inactive_step, -1.0)); 1045 PetscCall(VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 1046 PetscCall(VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 1047 } 1048 /* Verify that this is a descent direction */ 1049 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 1050 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 1051 if (PetscIsInfOrNanReal(gd) || (gd/(dnorm*dnorm) <= -1e10 || gd/(dnorm*dnorm) >= -1e-10)) { 1052 /* Not a descent direction, so we reset back to projected gradient descent */ 1053 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 1054 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 1055 ++cg->descent_error; 1056 } else { 1057 } 1058 } 1059 PetscFunctionReturn(0); 1060 } 1061 1062 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0) 1063 { 1064 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 1065 1066 PetscFunctionBegin; 1067 PetscCall(PetscObjectReference((PetscObject)H0)); 1068 cg->pc = H0; 1069 PetscFunctionReturn(0); 1070 } 1071