1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/ 3 #include <petscksp.h> 4 5 #define CG_GradientDescent 0 6 #define CG_HestenesStiefel 1 7 #define CG_FletcherReeves 2 8 #define CG_PolakRibierePolyak 3 9 #define CG_PolakRibierePlus 4 10 #define CG_DaiYuan 5 11 #define CG_HagerZhang 6 12 #define CG_DaiKou 7 13 #define CG_KouDai 8 14 #define CG_SSML_BFGS 9 15 #define CG_SSML_DFP 10 16 #define CG_SSML_BROYDEN 11 17 #define CG_PCGradientDescent 12 18 #define CGTypes 13 19 20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"}; 21 22 #define CG_AS_NONE 0 23 #define CG_AS_BERTSEKAS 1 24 #define CG_AS_SIZE 2 25 26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 27 28 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) 29 { 30 PetscErrorCode ierr; 31 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 32 33 PetscFunctionBegin; 34 CHKERRQ(ISDestroy(&cg->inactive_old)); 35 if (cg->inactive_idx) { 36 CHKERRQ(ISDuplicate(cg->inactive_idx, &cg->inactive_old)); 37 CHKERRQ(ISCopy(cg->inactive_idx, cg->inactive_old)); 38 } 39 switch (asType) { 40 case CG_AS_NONE: 41 CHKERRQ(ISDestroy(&cg->inactive_idx)); 42 CHKERRQ(VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx)); 43 CHKERRQ(ISDestroy(&cg->active_idx)); 44 CHKERRQ(ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx)); 45 break; 46 47 case CG_AS_BERTSEKAS: 48 /* Use gradient descent to estimate the active set */ 49 CHKERRQ(VecCopy(cg->unprojected_gradient, cg->W)); 50 CHKERRQ(VecScale(cg->W, -1.0)); 51 ierr = TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, 52 &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);CHKERRQ(ierr); 53 break; 54 55 default: 56 break; 57 } 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step) 62 { 63 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 64 65 PetscFunctionBegin; 66 switch (asType) { 67 case CG_AS_NONE: 68 CHKERRQ(VecISSet(step, cg->active_idx, 0.0)); 69 break; 70 71 case CG_AS_BERTSEKAS: 72 CHKERRQ(TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step)); 73 break; 74 75 default: 76 break; 77 } 78 PetscFunctionReturn(0); 79 } 80 81 static PetscErrorCode TaoSolve_BNCG(Tao tao) 82 { 83 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 84 PetscReal step=1.0,gnorm,gnorm2, resnorm; 85 PetscInt nDiff; 86 87 PetscFunctionBegin; 88 /* Project the current point onto the feasible set */ 89 CHKERRQ(TaoComputeVariableBounds(tao)); 90 CHKERRQ(TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU)); 91 92 /* Project the initial point onto the feasible region */ 93 CHKERRQ(TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution)); 94 95 if (nDiff > 0 || !tao->recycle) { 96 CHKERRQ(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 97 } 98 CHKERRQ(VecNorm(cg->unprojected_gradient,NORM_2,&gnorm)); 99 PetscCheck(!PetscIsInfOrNanReal(cg->f) && !PetscIsInfOrNanReal(gnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 100 101 /* Estimate the active set and compute the projected gradient */ 102 CHKERRQ(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 103 104 /* Project the gradient and calculate the norm */ 105 CHKERRQ(VecCopy(cg->unprojected_gradient, tao->gradient)); 106 CHKERRQ(VecISSet(tao->gradient, cg->active_idx, 0.0)); 107 CHKERRQ(VecNorm(tao->gradient,NORM_2,&gnorm)); 108 gnorm2 = gnorm*gnorm; 109 110 /* Initialize counters */ 111 tao->niter = 0; 112 cg->ls_fails = cg->descent_error = 0; 113 cg->resets = -1; 114 cg->skipped_updates = cg->pure_gd_steps = 0; 115 cg->iter_quad = 0; 116 117 /* Convergence test at the starting point. */ 118 tao->reason = TAO_CONTINUE_ITERATING; 119 120 CHKERRQ(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 121 CHKERRQ(VecNorm(cg->W, NORM_2, &resnorm)); 122 PetscCheck(!PetscIsInfOrNanReal(resnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 123 CHKERRQ(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 124 CHKERRQ(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 125 CHKERRQ((*tao->ops->convergencetest)(tao,tao->cnvP)); 126 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 127 /* Calculate initial direction. */ 128 if (!tao->recycle) { 129 /* We are not recycling a solution/history from a past TaoSolve */ 130 CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2)); 131 } 132 /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */ 133 while (1) { 134 /* Call general purpose update function */ 135 if (tao->ops->update) { 136 CHKERRQ((*tao->ops->update)(tao, tao->niter, tao->user_update)); 137 } 138 CHKERRQ(TaoBNCGConductIteration(tao, gnorm)); 139 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 140 } 141 } 142 143 static PetscErrorCode TaoSetUp_BNCG(Tao tao) 144 { 145 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 146 147 PetscFunctionBegin; 148 if (!tao->gradient) { 149 CHKERRQ(VecDuplicate(tao->solution,&tao->gradient)); 150 } 151 if (!tao->stepdirection) { 152 CHKERRQ(VecDuplicate(tao->solution,&tao->stepdirection)); 153 } 154 if (!cg->W) { 155 CHKERRQ(VecDuplicate(tao->solution,&cg->W)); 156 } 157 if (!cg->work) { 158 CHKERRQ(VecDuplicate(tao->solution,&cg->work)); 159 } 160 if (!cg->sk) { 161 CHKERRQ(VecDuplicate(tao->solution,&cg->sk)); 162 } 163 if (!cg->yk) { 164 CHKERRQ(VecDuplicate(tao->gradient,&cg->yk)); 165 } 166 if (!cg->X_old) { 167 CHKERRQ(VecDuplicate(tao->solution,&cg->X_old)); 168 } 169 if (!cg->G_old) { 170 CHKERRQ(VecDuplicate(tao->gradient,&cg->G_old)); 171 } 172 if (cg->diag_scaling) { 173 CHKERRQ(VecDuplicate(tao->solution,&cg->d_work)); 174 CHKERRQ(VecDuplicate(tao->solution,&cg->y_work)); 175 CHKERRQ(VecDuplicate(tao->solution,&cg->g_work)); 176 } 177 if (!cg->unprojected_gradient) { 178 CHKERRQ(VecDuplicate(tao->gradient,&cg->unprojected_gradient)); 179 } 180 if (!cg->unprojected_gradient_old) { 181 CHKERRQ(VecDuplicate(tao->gradient,&cg->unprojected_gradient_old)); 182 } 183 CHKERRQ(MatLMVMAllocate(cg->B, cg->sk, cg->yk)); 184 if (cg->pc) { 185 CHKERRQ(MatLMVMSetJ0(cg->B, cg->pc)); 186 } 187 PetscFunctionReturn(0); 188 } 189 190 static PetscErrorCode TaoDestroy_BNCG(Tao tao) 191 { 192 TAO_BNCG *cg = (TAO_BNCG*) tao->data; 193 194 PetscFunctionBegin; 195 if (tao->setupcalled) { 196 CHKERRQ(VecDestroy(&cg->W)); 197 CHKERRQ(VecDestroy(&cg->work)); 198 CHKERRQ(VecDestroy(&cg->X_old)); 199 CHKERRQ(VecDestroy(&cg->G_old)); 200 CHKERRQ(VecDestroy(&cg->unprojected_gradient)); 201 CHKERRQ(VecDestroy(&cg->unprojected_gradient_old)); 202 CHKERRQ(VecDestroy(&cg->g_work)); 203 CHKERRQ(VecDestroy(&cg->d_work)); 204 CHKERRQ(VecDestroy(&cg->y_work)); 205 CHKERRQ(VecDestroy(&cg->sk)); 206 CHKERRQ(VecDestroy(&cg->yk)); 207 } 208 CHKERRQ(ISDestroy(&cg->active_lower)); 209 CHKERRQ(ISDestroy(&cg->active_upper)); 210 CHKERRQ(ISDestroy(&cg->active_fixed)); 211 CHKERRQ(ISDestroy(&cg->active_idx)); 212 CHKERRQ(ISDestroy(&cg->inactive_idx)); 213 CHKERRQ(ISDestroy(&cg->inactive_old)); 214 CHKERRQ(ISDestroy(&cg->new_inactives)); 215 CHKERRQ(MatDestroy(&cg->B)); 216 if (cg->pc) { 217 CHKERRQ(MatDestroy(&cg->pc)); 218 } 219 CHKERRQ(PetscFree(tao->data)); 220 PetscFunctionReturn(0); 221 } 222 223 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao) 224 { 225 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 226 227 PetscFunctionBegin; 228 CHKERRQ(PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization")); 229 CHKERRQ(PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type,NULL)); 230 if (cg->cg_type != CG_SSML_BFGS) cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */ 231 if (CG_GradientDescent == cg->cg_type) { 232 cg->cg_type = CG_PCGradientDescent; 233 /* Set scaling equal to none or, at best, scalar scaling. */ 234 cg->unscaled_restart = PETSC_TRUE; 235 cg->diag_scaling = PETSC_FALSE; 236 } 237 CHKERRQ(PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL)); 238 CHKERRQ(PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL)); 239 CHKERRQ(PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL)); 240 CHKERRQ(PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL)); 241 CHKERRQ(PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL)); 242 CHKERRQ(PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL)); 243 CHKERRQ(PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL)); 244 CHKERRQ(PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL)); 245 CHKERRQ(PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL)); 246 CHKERRQ(PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL)); 247 CHKERRQ(PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL)); 248 CHKERRQ(PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL)); 249 CHKERRQ(PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL)); 250 CHKERRQ(PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL)); 251 CHKERRQ(PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL)); 252 CHKERRQ(PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL)); 253 CHKERRQ(PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL)); 254 CHKERRQ(PetscOptionsBool("-tao_bncg_no_scaling","Disable all scaling except in restarts","",cg->no_scaling,&cg->no_scaling,NULL)); 255 if (cg->no_scaling) { 256 cg->diag_scaling = PETSC_FALSE; 257 cg->alpha = -1.0; 258 } 259 if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling) { /* Some more default options that appear to be good. */ 260 cg->neg_xi = PETSC_TRUE; 261 } 262 CHKERRQ(PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL)); 263 CHKERRQ(PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL)); 264 CHKERRQ(PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL)); 265 CHKERRQ(PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts","",cg->delta_min,&cg->delta_min,NULL)); 266 CHKERRQ(PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts","",cg->delta_max,&cg->delta_max,NULL)); 267 268 CHKERRQ(PetscOptionsTail()); 269 CHKERRQ(MatSetOptionsPrefix(cg->B, ((PetscObject)tao)->prefix)); 270 CHKERRQ(MatAppendOptionsPrefix(cg->B, "tao_bncg_")); 271 CHKERRQ(MatSetFromOptions(cg->B)); 272 PetscFunctionReturn(0); 273 } 274 275 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) 276 { 277 PetscBool isascii; 278 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 279 280 PetscFunctionBegin; 281 CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 282 if (isascii) { 283 CHKERRQ(PetscViewerASCIIPushTab(viewer)); 284 CHKERRQ(PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type])); 285 CHKERRQ(PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %i\n", cg->skipped_updates)); 286 CHKERRQ(PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %i\n", cg->resets)); 287 CHKERRQ(PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %i\n", cg->pure_gd_steps)); 288 CHKERRQ(PetscViewerASCIIPrintf(viewer, "Not a descent direction: %i\n", cg->descent_error)); 289 CHKERRQ(PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails)); 290 if (cg->diag_scaling) { 291 CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii)); 292 if (isascii) { 293 CHKERRQ(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 294 CHKERRQ(MatView(cg->B, viewer)); 295 CHKERRQ(PetscViewerPopFormat(viewer)); 296 } 297 } 298 CHKERRQ(PetscViewerASCIIPopTab(viewer)); 299 } 300 PetscFunctionReturn(0); 301 } 302 303 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha) 304 { 305 PetscReal a, b, c, sig1, sig2; 306 307 PetscFunctionBegin; 308 *scale = 0.0; 309 if (1.0 == alpha) *scale = yts/yty; 310 else if (0.0 == alpha) *scale = sts/yts; 311 else if (-1.0 == alpha) *scale = 1.0; 312 else { 313 a = yty; 314 b = yts; 315 c = sts; 316 a *= alpha; 317 b *= -(2.0*alpha - 1.0); 318 c *= alpha - 1.0; 319 sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 320 sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 321 /* accept the positive root as the scalar */ 322 if (sig1 > 0.0) *scale = sig1; 323 else if (sig2 > 0.0) *scale = sig2; 324 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar"); 325 } 326 PetscFunctionReturn(0); 327 } 328 329 /*MC 330 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 331 332 Options Database Keys: 333 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled) 334 . -tao_bncg_eta <r> - restart tolerance 335 . -tao_bncg_type <taocg_type> - cg formula 336 . -tao_bncg_as_type <none,bertsekas> - active set estimation method 337 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation 338 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation 339 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision. 340 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method 341 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 342 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 343 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method 344 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method 345 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method 346 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method 347 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True. 348 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods 349 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts 350 . -tao_bncg_zeta <r> - Scaling parameter in the KD method 351 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps 352 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps 353 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart 354 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem 355 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations 356 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults. 357 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions 358 359 Notes: 360 CG formulas are: 361 + "gd" - Gradient Descent 362 . "fr" - Fletcher-Reeves 363 . "pr" - Polak-Ribiere-Polyak 364 . "prp" - Polak-Ribiere-Plus 365 . "hs" - Hestenes-Steifel 366 . "dy" - Dai-Yuan 367 . "ssml_bfgs" - Self-Scaling Memoryless BFGS 368 . "ssml_dfp" - Self-Scaling Memoryless DFP 369 . "ssml_brdn" - Self-Scaling Memoryless Broyden 370 . "hz" - Hager-Zhang (CG_DESCENT 5.3) 371 . "dk" - Dai-Kou (2013) 372 - "kd" - Kou-Dai (2015) 373 374 Level: beginner 375 M*/ 376 377 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) 378 { 379 TAO_BNCG *cg; 380 const char *morethuente_type = TAOLINESEARCHMT; 381 382 PetscFunctionBegin; 383 tao->ops->setup = TaoSetUp_BNCG; 384 tao->ops->solve = TaoSolve_BNCG; 385 tao->ops->view = TaoView_BNCG; 386 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 387 tao->ops->destroy = TaoDestroy_BNCG; 388 389 /* Override default settings (unless already changed) */ 390 if (!tao->max_it_changed) tao->max_it = 2000; 391 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 392 393 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 394 /* method. In particular, gtol should be less that 0.5; the value used in */ 395 /* Nocedal and Wright is 0.10. We use the default values for the */ 396 /* linesearch because it seems to work better. */ 397 CHKERRQ(TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch)); 398 CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1)); 399 CHKERRQ(TaoLineSearchSetType(tao->linesearch, morethuente_type)); 400 CHKERRQ(TaoLineSearchUseTaoRoutines(tao->linesearch, tao)); 401 402 CHKERRQ(PetscNewLog(tao,&cg)); 403 tao->data = (void*)cg; 404 CHKERRQ(KSPInitializePackage()); 405 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)tao), &cg->B)); 406 CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1)); 407 CHKERRQ(MatSetType(cg->B, MATLMVMDIAGBROYDEN)); 408 409 cg->pc = NULL; 410 411 cg->dk_eta = 0.5; 412 cg->hz_eta = 0.4; 413 cg->dynamic_restart = PETSC_FALSE; 414 cg->unscaled_restart = PETSC_FALSE; 415 cg->no_scaling = PETSC_FALSE; 416 cg->delta_min = 1e-7; 417 cg->delta_max = 100; 418 cg->theta = 1.0; 419 cg->hz_theta = 1.0; 420 cg->dfp_scale = 1.0; 421 cg->bfgs_scale = 1.0; 422 cg->zeta = 0.1; 423 cg->min_quad = 6; 424 cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/ 425 cg->xi = 1.0; 426 cg->neg_xi = PETSC_TRUE; 427 cg->spaced_restart = PETSC_FALSE; 428 cg->tol_quad = 1e-8; 429 cg->as_step = 0.001; 430 cg->as_tol = 0.001; 431 cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/ 432 cg->as_type = CG_AS_BERTSEKAS; 433 cg->cg_type = CG_SSML_BFGS; 434 cg->alpha = 1.0; 435 cg->diag_scaling = PETSC_TRUE; 436 PetscFunctionReturn(0); 437 } 438 439 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq) 440 { 441 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 442 PetscReal scaling; 443 444 PetscFunctionBegin; 445 ++cg->resets; 446 scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23); 447 scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling)); 448 if (cg->unscaled_restart) { 449 scaling = 1.0; 450 ++cg->pure_gd_steps; 451 } 452 CHKERRQ(VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient)); 453 /* Also want to reset our diagonal scaling with each restart */ 454 if (cg->diag_scaling) { 455 CHKERRQ(MatLMVMReset(cg->B, PETSC_FALSE)); 456 } 457 PetscFunctionReturn(0); 458 } 459 460 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold) 461 { 462 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 463 PetscReal quadinterp; 464 465 PetscFunctionBegin; 466 if (cg->f < cg->min_quad/10) { 467 *dynrestart = PETSC_FALSE; 468 PetscFunctionReturn(0); 469 } /* just skip this since this strategy doesn't work well for functions near zero */ 470 quadinterp = 2.0*(cg->f - fold)/(stepsize*(gd + gd_old)); 471 if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad; 472 else { 473 cg->iter_quad = 0; 474 *dynrestart = PETSC_FALSE; 475 } 476 if (cg->iter_quad >= cg->min_quad) { 477 cg->iter_quad = 0; 478 *dynrestart = PETSC_TRUE; 479 } 480 PetscFunctionReturn(0); 481 } 482 483 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback) 484 { 485 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 486 PetscReal gamma = 1.0, tau_k, beta; 487 PetscReal tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk=1.0, gd; 488 PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg; 489 PetscInt dim; 490 PetscBool cg_restart = PETSC_FALSE; 491 PetscFunctionBegin; 492 493 /* Local curvature check to see if we need to restart */ 494 if (tao->niter >= 1 || tao->recycle) { 495 CHKERRQ(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 496 CHKERRQ(VecNorm(cg->yk, NORM_2, &ynorm)); 497 ynorm2 = ynorm*ynorm; 498 CHKERRQ(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 499 if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON) { 500 cg_restart = PETSC_TRUE; 501 ++cg->skipped_updates; 502 } 503 if (cg->spaced_restart) { 504 CHKERRQ(VecGetSize(tao->gradient, &dim)); 505 if (tao->niter % (dim*cg->min_restart_num)) cg_restart = PETSC_TRUE; 506 } 507 } 508 /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */ 509 if (cg->spaced_restart) { 510 CHKERRQ(VecGetSize(tao->gradient, &dim)); 511 if (0 == tao->niter % (6*dim)) cg_restart = PETSC_TRUE; 512 } 513 /* Compute the diagonal scaling vector if applicable */ 514 if (cg->diag_scaling) { 515 CHKERRQ(MatLMVMUpdate(cg->B, tao->solution, tao->gradient)); 516 } 517 518 /* A note on diagonal scaling (to be added to paper): 519 For the FR, PR, PRP, and DY methods, the diagonally scaled versions 520 must be derived as a preconditioned CG method rather than as 521 a Hessian initialization like in the Broyden methods. */ 522 523 /* In that case, one writes the objective function as 524 f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k). 525 Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to 526 HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the 527 same under preconditioning. Note that A is diagonal, such that A^T = A. */ 528 529 /* This yields questions like what the dot product d_k^T y_k 530 should look like. HZ mistakenly treats that as the same under 531 preconditioning, but that is not necessarily true. */ 532 533 /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation, 534 we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}), 535 yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is 536 NOT the same if our preconditioning matrix is updated between iterations. 537 This same issue is found when considering dot products of the form g_{k+1}^T y_k. */ 538 539 /* Compute CG step direction */ 540 if (cg_restart) { 541 CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2)); 542 } else if (pcgd_fallback) { 543 /* Just like preconditioned CG */ 544 CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work)); 545 CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 546 } else if (ynorm2 > PETSC_MACHINE_EPSILON) { 547 switch (cg->cg_type) { 548 case CG_PCGradientDescent: 549 if (!cg->diag_scaling) { 550 if (!cg->no_scaling) { 551 cg->sts = step*step*dnorm*dnorm; 552 CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha)); 553 } else { 554 tau_k = 1.0; 555 ++cg->pure_gd_steps; 556 } 557 CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient)); 558 } else { 559 CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work)); 560 CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 561 } 562 break; 563 564 case CG_HestenesStiefel: 565 /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */ 566 if (!cg->diag_scaling) { 567 cg->sts = step*step*dnorm*dnorm; 568 CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 569 CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha)); 570 beta = tau_k*gkp1_yk/dk_yk; 571 CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 572 } else { 573 CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work)); 574 CHKERRQ(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 575 beta = gkp1_yk/dk_yk; 576 CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 577 } 578 break; 579 580 case CG_FletcherReeves: 581 CHKERRQ(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 582 CHKERRQ(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 583 CHKERRQ(VecNorm(cg->yk, NORM_2, &ynorm)); 584 ynorm2 = ynorm*ynorm; 585 CHKERRQ(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 586 if (!cg->diag_scaling) { 587 CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha)); 588 beta = tau_k*gnorm2/gnorm2_old; 589 CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 590 } else { 591 CHKERRQ(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Before it's updated */ 592 CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work)); 593 CHKERRQ(VecDot(tao->gradient, cg->g_work, &tmp)); 594 beta = tmp/gnorm2_old; 595 CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 596 } 597 break; 598 599 case CG_PolakRibierePolyak: 600 snorm = step*dnorm; 601 if (!cg->diag_scaling) { 602 CHKERRQ(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 603 CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 604 CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha)); 605 beta = tau_k*gkp1_yk/gnorm2_old; 606 CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 607 } else { 608 CHKERRQ(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); 609 CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work)); 610 CHKERRQ(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 611 beta = gkp1_yk/gnorm2_old; 612 CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 613 } 614 break; 615 616 case CG_PolakRibierePlus: 617 CHKERRQ(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 618 CHKERRQ(VecNorm(cg->yk, NORM_2, &ynorm)); 619 ynorm2 = ynorm*ynorm; 620 if (!cg->diag_scaling) { 621 CHKERRQ(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 622 CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 623 CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha)); 624 beta = tau_k*gkp1_yk/gnorm2_old; 625 beta = PetscMax(beta, 0.0); 626 CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 627 } else { 628 CHKERRQ(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Old gtDg */ 629 CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work)); 630 CHKERRQ(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 631 beta = gkp1_yk/gnorm2_old; 632 beta = PetscMax(beta, 0.0); 633 CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 634 } 635 break; 636 637 case CG_DaiYuan: 638 /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property." 639 SIAM Journal on optimization 10, no. 1 (1999): 177-182. */ 640 if (!cg->diag_scaling) { 641 CHKERRQ(VecDot(tao->stepdirection, tao->gradient, &gd)); 642 CHKERRQ(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 643 CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha)); 644 beta = tau_k*gnorm2/(gd - gd_old); 645 CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 646 } else { 647 CHKERRQ(MatMult(cg->B, tao->stepdirection, cg->d_work)); 648 CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work)); 649 CHKERRQ(VecDot(cg->g_work, tao->gradient, >Dg)); 650 CHKERRQ(VecDot(tao->stepdirection, cg->G_old, &gd_old)); 651 CHKERRQ(VecDot(cg->d_work, cg->g_work, &dk_yk)); 652 dk_yk = dk_yk - gd_old; 653 beta = gtDg/dk_yk; 654 CHKERRQ(VecScale(cg->d_work, beta)); 655 CHKERRQ(VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work)); 656 } 657 break; 658 659 case CG_HagerZhang: 660 /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent." 661 ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */ 662 CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd)); 663 CHKERRQ(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 664 CHKERRQ(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 665 snorm = dnorm*step; 666 cg->yts = step*dk_yk; 667 if (cg->use_dynamic_restart) { 668 CHKERRQ(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 669 } 670 if (cg->dynamic_restart) { 671 CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2)); 672 } else { 673 if (!cg->diag_scaling) { 674 CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 675 CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 676 /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */ 677 tmp = gd/dk_yk; 678 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)); 679 /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */ 680 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 681 /* d <- -t*g + beta*t*d */ 682 CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 683 } else { 684 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 685 cg->yty = ynorm2; 686 cg->sts = snorm*snorm; 687 /* Apply the diagonal scaling to all my vectors */ 688 CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work)); 689 CHKERRQ(MatSolve(cg->B, cg->yk, cg->y_work)); 690 CHKERRQ(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 691 /* Construct the constant ytDgkp1 */ 692 CHKERRQ(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 693 /* Construct the constant for scaling Dkyk in the update */ 694 tmp = gd/dk_yk; 695 CHKERRQ(VecDot(cg->yk, cg->y_work, &tau_k)); 696 tau_k = -tau_k*gd/(dk_yk*dk_yk); 697 /* beta is the constant which adds the dk contribution */ 698 beta = gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */ 699 /* From HZ2013, modified to account for diagonal scaling*/ 700 CHKERRQ(VecDot(cg->G_old, cg->d_work, &gd_old)); 701 CHKERRQ(VecDot(tao->stepdirection, cg->g_work, &gd)); 702 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 703 /* Do the update */ 704 CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 705 } 706 } 707 break; 708 709 case CG_DaiKou: 710 /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search." 711 SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */ 712 CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd)); 713 CHKERRQ(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 714 CHKERRQ(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 715 snorm = step*dnorm; 716 cg->yts = dk_yk*step; 717 if (!cg->diag_scaling) { 718 CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 719 CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 720 /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */ 721 tmp = gd/dk_yk; 722 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk; 723 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 724 /* d <- -t*g + beta*t*d */ 725 CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk)); 726 } else { 727 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 728 cg->yty = ynorm2; 729 cg->sts = snorm*snorm; 730 CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work)); 731 CHKERRQ(MatSolve(cg->B, cg->yk, cg->y_work)); 732 CHKERRQ(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 733 /* Construct the constant ytDgkp1 */ 734 CHKERRQ(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 735 CHKERRQ(VecDot(cg->yk, cg->y_work, &tau_k)); 736 tau_k = tau_k*gd/(dk_yk*dk_yk); 737 tmp = gd/dk_yk; 738 /* beta is the constant which adds the dk contribution */ 739 beta = gkp1_yk/dk_yk - step*tmp - tau_k; 740 /* Update this for the last term in beta */ 741 CHKERRQ(VecDot(cg->y_work, tao->stepdirection, &dk_yk)); 742 beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */ 743 CHKERRQ(VecDot(tao->stepdirection, cg->g_work, &gd)); 744 CHKERRQ(VecDot(cg->G_old, cg->d_work, &gd_old)); 745 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 746 /* Do the update */ 747 CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 748 } 749 break; 750 751 case CG_KouDai: 752 /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization." 753 Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */ 754 CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd)); 755 CHKERRQ(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 756 CHKERRQ(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 757 snorm = step*dnorm; 758 cg->yts = dk_yk*step; 759 if (cg->use_dynamic_restart) { 760 CHKERRQ(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 761 } 762 if (cg->dynamic_restart) { 763 CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2)); 764 } else { 765 if (!cg->diag_scaling) { 766 CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 767 CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha)); 768 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 769 if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */ 770 { 771 beta = cg->zeta*tau_k*gd/(dnorm*dnorm); 772 gamma = 0.0; 773 } else { 774 if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk; 775 /* This seems to be very effective when there's no tau_k scaling. 776 This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */ 777 else { 778 gamma = cg->xi*gd/dk_yk; 779 } 780 } 781 /* d <- -t*g + beta*t*d + t*tmp*yk */ 782 CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk)); 783 } else { 784 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 785 cg->yty = ynorm2; 786 cg->sts = snorm*snorm; 787 CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work)); 788 CHKERRQ(MatSolve(cg->B, cg->yk, cg->y_work)); 789 /* Construct the constant ytDgkp1 */ 790 CHKERRQ(VecDot(cg->yk, cg->g_work, &gkp1D_yk)); 791 /* Construct the constant for scaling Dkyk in the update */ 792 gamma = gd/dk_yk; 793 /* tau_k = -ytDy/(ytd)^2 * gd */ 794 CHKERRQ(VecDot(cg->yk, cg->y_work, &tau_k)); 795 tau_k = tau_k*gd/(dk_yk*dk_yk); 796 /* beta is the constant which adds the d_k contribution */ 797 beta = gkp1D_yk/dk_yk - step*gamma - tau_k; 798 /* Here is the requisite check */ 799 CHKERRQ(VecDot(tao->stepdirection, cg->g_work, &tmp)); 800 if (cg->neg_xi) { 801 /* modified KD implementation */ 802 if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk; 803 else { 804 gamma = cg->xi*gd/dk_yk; 805 } 806 if (beta < cg->zeta*tmp/(dnorm*dnorm)) { 807 beta = cg->zeta*tmp/(dnorm*dnorm); 808 gamma = 0.0; 809 } 810 } else { /* original KD 2015 implementation */ 811 if (beta < cg->zeta*tmp/(dnorm*dnorm)) { 812 beta = cg->zeta*tmp/(dnorm*dnorm); 813 gamma = 0.0; 814 } else { 815 gamma = cg->xi*gd/dk_yk; 816 } 817 } 818 /* Do the update in two steps */ 819 CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 820 CHKERRQ(VecAXPY(tao->stepdirection, gamma, cg->y_work)); 821 } 822 } 823 break; 824 825 case CG_SSML_BFGS: 826 /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory." 827 Discussion Papers 269 (1977). */ 828 CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd)); 829 CHKERRQ(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 830 snorm = step*dnorm; 831 cg->yts = dk_yk*step; 832 cg->yty = ynorm2; 833 cg->sts = snorm*snorm; 834 if (!cg->diag_scaling) { 835 CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 836 CHKERRQ(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 837 tmp = gd/dk_yk; 838 beta = tau_k*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*tmp; 839 /* d <- -t*g + beta*t*d + t*tmp*yk */ 840 CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk)); 841 } else { 842 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 843 CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work)); 844 CHKERRQ(MatSolve(cg->B, cg->yk, cg->y_work)); 845 /* compute scalar gamma */ 846 CHKERRQ(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 847 CHKERRQ(VecDot(cg->y_work, cg->yk, &tmp)); 848 gamma = gd/dk_yk; 849 /* Compute scalar beta */ 850 beta = (gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 851 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 852 CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 853 } 854 break; 855 856 case CG_SSML_DFP: 857 CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd)); 858 CHKERRQ(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 859 snorm = step*dnorm; 860 cg->yts = dk_yk*step; 861 cg->yty = ynorm2; 862 cg->sts = snorm*snorm; 863 if (!cg->diag_scaling) { 864 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 865 CHKERRQ(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 866 CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 867 tau_k = cg->dfp_scale*tau_k; 868 tmp = tau_k*gkp1_yk/cg->yty; 869 beta = -step*gd/dk_yk; 870 /* d <- -t*g + beta*d + tmp*yk */ 871 CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 872 } else { 873 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */ 874 CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work)); 875 CHKERRQ(MatSolve(cg->B, cg->yk, cg->y_work)); 876 /* compute scalar gamma */ 877 CHKERRQ(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 878 CHKERRQ(VecDot(cg->y_work, cg->yk, &tmp)); 879 gamma = (gkp1_yk/tmp); 880 /* Compute scalar beta */ 881 beta = -step*gd/dk_yk; 882 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 883 CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 884 } 885 break; 886 887 case CG_SSML_BROYDEN: 888 CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd)); 889 CHKERRQ(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 890 snorm = step*dnorm; 891 cg->yts = step*dk_yk; 892 cg->yty = ynorm2; 893 cg->sts = snorm*snorm; 894 if (!cg->diag_scaling) { 895 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 896 CHKERRQ(TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale)); 897 CHKERRQ(TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale)); 898 CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 899 tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp; 900 /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0, 901 it should reproduce the tau_dfp scaling. Same with dfp_scale. */ 902 tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/cg->yty; 903 beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 904 /* d <- -t*g + beta*d + tmp*yk */ 905 CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 906 } else { 907 /* We have diagonal scaling enabled */ 908 CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work)); 909 CHKERRQ(MatSolve(cg->B, cg->yk, cg->y_work)); 910 /* compute scalar gamma */ 911 CHKERRQ(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 912 CHKERRQ(VecDot(cg->y_work, cg->yk, &tmp)); 913 gamma = cg->theta*gd/dk_yk + (1-cg->theta)*(gkp1_yk/tmp); 914 /* Compute scalar beta */ 915 beta = cg->theta*(gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 916 /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 917 CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 918 } 919 break; 920 921 default: 922 break; 923 924 } 925 } 926 PetscFunctionReturn(0); 927 } 928 929 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm) 930 { 931 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 932 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 933 PetscReal step=1.0,gnorm2,gd,dnorm=0.0; 934 PetscReal gnorm2_old,f_old,resnorm, gnorm_old; 935 PetscBool pcgd_fallback = PETSC_FALSE; 936 937 PetscFunctionBegin; 938 /* We are now going to perform a line search along the direction. */ 939 /* Store solution and gradient info before it changes */ 940 CHKERRQ(VecCopy(tao->solution, cg->X_old)); 941 CHKERRQ(VecCopy(tao->gradient, cg->G_old)); 942 CHKERRQ(VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old)); 943 944 gnorm_old = gnorm; 945 gnorm2_old = gnorm_old*gnorm_old; 946 f_old = cg->f; 947 /* Perform bounded line search. If we are recycling a solution from a previous */ 948 /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */ 949 if (!(tao->recycle && 0 == tao->niter)) { 950 /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */ 951 CHKERRQ(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 952 CHKERRQ(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 953 CHKERRQ(TaoAddLineSearchCounts(tao)); 954 955 /* Check linesearch failure */ 956 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 957 ++cg->ls_fails; 958 if (cg->cg_type == CG_GradientDescent) { 959 /* Nothing left to do but fail out of the optimization */ 960 step = 0.0; 961 tao->reason = TAO_DIVERGED_LS_FAILURE; 962 } else { 963 /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */ 964 CHKERRQ(VecCopy(cg->X_old, tao->solution)); 965 CHKERRQ(VecCopy(cg->G_old, tao->gradient)); 966 CHKERRQ(VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient)); 967 gnorm = gnorm_old; 968 gnorm2 = gnorm2_old; 969 cg->f = f_old; 970 971 /* Fall back on preconditioned CG (so long as you're not already using it) */ 972 if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling) { 973 pcgd_fallback = PETSC_TRUE; 974 CHKERRQ(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 975 976 CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2)); 977 CHKERRQ(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 978 979 CHKERRQ(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 980 CHKERRQ(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 981 CHKERRQ(TaoAddLineSearchCounts(tao)); 982 983 pcgd_fallback = PETSC_FALSE; 984 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 985 /* Going to perform a regular gradient descent step. */ 986 ++cg->ls_fails; 987 step = 0.0; 988 } 989 } 990 /* Fall back on the scaled gradient step */ 991 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 992 ++cg->ls_fails; 993 CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2)); 994 CHKERRQ(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 995 CHKERRQ(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 996 CHKERRQ(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 997 CHKERRQ(TaoAddLineSearchCounts(tao)); 998 } 999 1000 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 1001 /* Nothing left to do but fail out of the optimization */ 1002 ++cg->ls_fails; 1003 step = 0.0; 1004 tao->reason = TAO_DIVERGED_LS_FAILURE; 1005 } else { 1006 /* One of the fallbacks worked. Set them both back equal to false. */ 1007 pcgd_fallback = PETSC_FALSE; 1008 } 1009 } 1010 } 1011 /* Convergence test for line search failure */ 1012 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1013 1014 /* Standard convergence test */ 1015 CHKERRQ(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 1016 CHKERRQ(VecNorm(cg->W, NORM_2, &resnorm)); 1017 PetscCheck(!PetscIsInfOrNanReal(resnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 1018 CHKERRQ(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 1019 CHKERRQ(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 1020 CHKERRQ((*tao->ops->convergencetest)(tao,tao->cnvP)); 1021 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1022 } 1023 /* Assert we have an updated step and we need at least one more iteration. */ 1024 /* Calculate the next direction */ 1025 /* Estimate the active set at the new solution */ 1026 CHKERRQ(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 1027 /* Compute the projected gradient and its norm */ 1028 CHKERRQ(VecCopy(cg->unprojected_gradient, tao->gradient)); 1029 CHKERRQ(VecISSet(tao->gradient, cg->active_idx, 0.0)); 1030 CHKERRQ(VecNorm(tao->gradient,NORM_2,&gnorm)); 1031 gnorm2 = gnorm*gnorm; 1032 1033 /* Calculate some quantities used in the StepDirectionUpdate. */ 1034 CHKERRQ(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 1035 /* Update the step direction. */ 1036 CHKERRQ(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 1037 ++tao->niter; 1038 CHKERRQ(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 1039 1040 if (cg->cg_type != CG_GradientDescent) { 1041 /* Figure out which previously active variables became inactive this iteration */ 1042 CHKERRQ(ISDestroy(&cg->new_inactives)); 1043 if (cg->inactive_idx && cg->inactive_old) { 1044 CHKERRQ(ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives)); 1045 } 1046 /* Selectively reset the CG step those freshly inactive variables */ 1047 if (cg->new_inactives) { 1048 CHKERRQ(VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 1049 CHKERRQ(VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 1050 CHKERRQ(VecCopy(cg->inactive_grad, cg->inactive_step)); 1051 CHKERRQ(VecScale(cg->inactive_step, -1.0)); 1052 CHKERRQ(VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 1053 CHKERRQ(VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 1054 } 1055 /* Verify that this is a descent direction */ 1056 CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd)); 1057 CHKERRQ(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 1058 if (PetscIsInfOrNanReal(gd) || (gd/(dnorm*dnorm) <= -1e10 || gd/(dnorm*dnorm) >= -1e-10)) { 1059 /* Not a descent direction, so we reset back to projected gradient descent */ 1060 CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2)); 1061 CHKERRQ(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 1062 ++cg->descent_error; 1063 } else { 1064 } 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0) 1070 { 1071 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 1072 1073 PetscFunctionBegin; 1074 CHKERRQ(PetscObjectReference((PetscObject)H0)); 1075 cg->pc = H0; 1076 PetscFunctionReturn(0); 1077 } 1078