1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> 3 #include <petscksp.h> 4 5 #define CG_GradientDescent 0 6 #define CG_HestenesStiefel 1 7 #define CG_FletcherReeves 2 8 #define CG_PolakRibierePolyak 3 9 #define CG_PolakRibierePlus 4 10 #define CG_DaiYuan 5 11 #define CG_HagerZhang 6 12 #define CG_DaiKou 7 13 #define CG_KouDai 8 14 #define CG_SSML_BFGS 9 15 #define CG_SSML_DFP 10 16 #define CG_SSML_BROYDEN 11 17 #define CG_Types 12 18 19 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn"}; 20 21 #define CG_AS_NONE 0 22 #define CG_AS_BERTSEKAS 1 23 #define CG_AS_SIZE 2 24 25 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 26 27 PetscErrorCode TaoBNCGSetRecycleFlag(Tao tao, PetscBool recycle) 28 { 29 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 30 31 PetscFunctionBegin; 32 cg->recycle = recycle; 33 PetscFunctionReturn(0); 34 } 35 36 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) 37 { 38 PetscErrorCode ierr; 39 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 40 41 PetscFunctionBegin; 42 ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr); 43 if (cg->inactive_idx) { 44 ierr = ISDuplicate(cg->inactive_idx, &cg->inactive_old);CHKERRQ(ierr); 45 ierr = ISCopy(cg->inactive_idx, cg->inactive_old);CHKERRQ(ierr); 46 } 47 switch (asType) { 48 case CG_AS_NONE: 49 ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr); 50 ierr = VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx);CHKERRQ(ierr); 51 ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr); 52 ierr = ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx);CHKERRQ(ierr); 53 break; 54 55 case CG_AS_BERTSEKAS: 56 /* Use gradient descent to estimate the active set */ 57 ierr = VecCopy(cg->unprojected_gradient, cg->W);CHKERRQ(ierr); 58 ierr = VecScale(cg->W, -1.0);CHKERRQ(ierr); 59 ierr = TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, 60 &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);CHKERRQ(ierr); 61 break; 62 63 default: 64 break; 65 } 66 PetscFunctionReturn(0); 67 } 68 69 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step) 70 { 71 PetscErrorCode ierr; 72 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 73 74 PetscFunctionBegin; 75 switch (asType) { 76 case CG_AS_NONE: 77 ierr = VecISSet(step, cg->active_idx, 0.0);CHKERRQ(ierr); 78 break; 79 80 case CG_AS_BERTSEKAS: 81 ierr = TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step);CHKERRQ(ierr); 82 break; 83 84 default: 85 break; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 static PetscErrorCode TaoSolve_BNCG(Tao tao) 91 { 92 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 93 PetscErrorCode ierr; 94 PetscReal step=1.0,gnorm,gnorm2; 95 PetscInt nDiff; 96 97 PetscFunctionBegin; 98 /* Project the current point onto the feasible set */ 99 ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); 100 ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr); 101 102 /* Project the initial point onto the feasible region */ 103 ierr = TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution);CHKERRQ(ierr); 104 ierr = TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient);CHKERRQ(ierr); 105 ierr = VecNorm(cg->unprojected_gradient,NORM_2,&gnorm);CHKERRQ(ierr); 106 if (PetscIsInfOrNanReal(cg->f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); 107 108 /* Estimate the active set and compute the projected gradient */ 109 ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr); 110 111 /* Project the gradient and calculate the norm */ 112 ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr); 113 ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr); 114 ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr); 115 gnorm2 = gnorm*gnorm; 116 117 /* Initialize counters */ 118 tao->niter = 0; 119 cg->ls_fails = cg->descent_error = 0; 120 cg->resets = -1; 121 cg->iter_quad = 0; 122 123 /* Convergence test at the starting point. */ 124 tao->reason = TAO_CONTINUE_ITERATING; 125 ierr = TaoLogConvergenceHistory(tao, cg->f, gnorm, 0.0, tao->ksp_its);CHKERRQ(ierr); 126 ierr = TaoMonitor(tao, tao->niter, cg->f, gnorm, 0.0, step);CHKERRQ(ierr); 127 ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); 128 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 129 130 /* Assert that we have not converged. Calculate initial direction. */ 131 /* This is where recycling goes. The outcome of this code must be */ 132 /* the direction that we will use. */ 133 if (cg->recycle) { 134 } 135 /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */ 136 ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr); 137 138 while(1) { 139 ierr = TaoBNCGConductIteration(tao, gnorm);CHKERRQ(ierr); 140 /* Steffenson is currently an experimental (broken) feature. Do not use. */ 141 if (cg->use_steffenson) { 142 ierr = TaoBNCGSteffensonAcceleration(tao);CHKERRQ(ierr); 143 } 144 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 145 } 146 PetscFunctionReturn(0); 147 } 148 149 static PetscErrorCode TaoSetUp_BNCG(Tao tao) 150 { 151 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 152 PetscErrorCode ierr; 153 154 PetscFunctionBegin; 155 if (!tao->gradient) { 156 ierr = VecDuplicate(tao->solution,&tao->gradient);CHKERRQ(ierr); 157 } 158 if (!tao->stepdirection) { 159 ierr = VecDuplicate(tao->solution,&tao->stepdirection);CHKERRQ(ierr); 160 } 161 if (!cg->W) { 162 ierr = VecDuplicate(tao->solution,&cg->W);CHKERRQ(ierr); 163 } 164 if (!cg->work) { 165 ierr = VecDuplicate(tao->solution,&cg->work);CHKERRQ(ierr); 166 } 167 if (!cg->sk) { 168 ierr = VecDuplicate(tao->solution,&cg->sk);CHKERRQ(ierr); 169 } 170 if (!cg->yk) { 171 ierr = VecDuplicate(tao->gradient,&cg->yk);CHKERRQ(ierr); 172 } 173 if (!cg->X_old) { 174 ierr = VecDuplicate(tao->solution,&cg->X_old);CHKERRQ(ierr); 175 } 176 if (!cg->G_old) { 177 ierr = VecDuplicate(tao->gradient,&cg->G_old);CHKERRQ(ierr); 178 } 179 if (cg->use_steffenson){ 180 if (!cg->steffnm1) { 181 ierr = VecDuplicate(tao->solution, &cg->steffnm1);CHKERRQ(ierr); 182 } 183 if (!cg->steffn) { 184 ierr = VecDuplicate(tao->solution, &cg->steffn);CHKERRQ(ierr); 185 } 186 if (!cg->steffnp1) { 187 ierr = VecDuplicate(tao->solution, &cg->steffnp1);CHKERRQ(ierr); 188 } 189 if (!cg->steffva) { 190 ierr = VecDuplicate(tao->solution, &cg->steffva);CHKERRQ(ierr); 191 } 192 if (!cg->steffvatmp) { 193 ierr = VecDuplicate(tao->solution, &cg->steffvatmp);CHKERRQ(ierr); 194 } 195 } 196 if (cg->diag_scaling){ 197 ierr = VecDuplicate(tao->solution,&cg->d_work);CHKERRQ(ierr); 198 ierr = VecDuplicate(tao->solution,&cg->y_work);CHKERRQ(ierr); 199 ierr = VecDuplicate(tao->solution,&cg->g_work);CHKERRQ(ierr); 200 } 201 if (!cg->unprojected_gradient) { 202 ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient);CHKERRQ(ierr); 203 } 204 if (!cg->unprojected_gradient_old) { 205 ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient_old);CHKERRQ(ierr); 206 } 207 208 ierr = MatLMVMAllocate(cg->B, cg->sk, cg->yk);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode TaoDestroy_BNCG(Tao tao) 213 { 214 TAO_BNCG *cg = (TAO_BNCG*) tao->data; 215 PetscErrorCode ierr; 216 217 PetscFunctionBegin; 218 if (tao->setupcalled) { 219 ierr = VecDestroy(&cg->W);CHKERRQ(ierr); 220 ierr = VecDestroy(&cg->work);CHKERRQ(ierr); 221 ierr = VecDestroy(&cg->X_old);CHKERRQ(ierr); 222 ierr = VecDestroy(&cg->G_old);CHKERRQ(ierr); 223 ierr = VecDestroy(&cg->unprojected_gradient);CHKERRQ(ierr); 224 ierr = VecDestroy(&cg->unprojected_gradient_old);CHKERRQ(ierr); 225 ierr = VecDestroy(&cg->g_work);CHKERRQ(ierr); 226 ierr = VecDestroy(&cg->d_work);CHKERRQ(ierr); 227 ierr = VecDestroy(&cg->y_work);CHKERRQ(ierr); 228 ierr = VecDestroy(&cg->sk);CHKERRQ(ierr); 229 ierr = VecDestroy(&cg->yk);CHKERRQ(ierr); 230 ierr = MatDestroy(&cg->B);CHKERRQ(ierr); 231 } 232 ierr = ISDestroy(&cg->active_lower);CHKERRQ(ierr); 233 ierr = ISDestroy(&cg->active_upper);CHKERRQ(ierr); 234 ierr = ISDestroy(&cg->active_fixed);CHKERRQ(ierr); 235 ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr); 236 ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr); 237 ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr); 238 ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr); 239 240 ierr = PetscFree(tao->data);CHKERRQ(ierr); 241 PetscFunctionReturn(0); 242 } 243 244 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao) 245 { 246 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 247 PetscErrorCode ierr; 248 249 PetscFunctionBegin; 250 ierr = TaoLineSearchSetFromOptions(tao->linesearch);CHKERRQ(ierr); 251 ierr = PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization");CHKERRQ(ierr); 252 ierr = PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CG_Types, CG_Table[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr); 253 ierr = PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr); 254 255 ierr = PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL);CHKERRQ(ierr); 256 ierr = PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL);CHKERRQ(ierr); 257 ierr = PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL);CHKERRQ(ierr); 258 ierr = PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL);CHKERRQ(ierr); 259 ierr = PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL);CHKERRQ(ierr); 260 ierr = PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL);CHKERRQ(ierr); 261 ierr = PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL);CHKERRQ(ierr); 262 ierr = PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL);CHKERRQ(ierr); 263 ierr = PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL);CHKERRQ(ierr); 264 ierr = PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL);CHKERRQ(ierr); 265 ierr = PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL);CHKERRQ(ierr); 266 ierr = PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL);CHKERRQ(ierr); 267 ierr = PetscOptionsBool("-tao_bncg_use_steffenson","(developer - incomplete, do not use) use vector-Steffenson acceleration","",cg->use_steffenson,&cg->use_steffenson,NULL);CHKERRQ(ierr); 268 ierr = PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL);CHKERRQ(ierr); 269 ierr = PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL);CHKERRQ(ierr); 270 ierr = PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL);CHKERRQ(ierr); 271 ierr = PetscOptionsBool("-tao_bncg_recycle","(developer) enable recycling the existing solution and gradient at the start of a new solve","",cg->recycle,&cg->recycle,NULL);CHKERRQ(ierr); 272 ierr = PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL);CHKERRQ(ierr); 273 ierr = PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL);CHKERRQ(ierr); 274 ierr = PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL);CHKERRQ(ierr); 275 ierr = PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL);CHKERRQ(ierr); 276 277 ierr = PetscOptionsTail();CHKERRQ(ierr); 278 ierr = MatSetFromOptions(cg->B);CHKERRQ(ierr); 279 PetscFunctionReturn(0); 280 } 281 282 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) 283 { 284 PetscBool isascii; 285 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 286 PetscErrorCode ierr; 287 288 PetscFunctionBegin; 289 ierr = PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);CHKERRQ(ierr); 290 if (isascii) { 291 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 292 ierr = PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type]);CHKERRQ(ierr); 293 ierr = PetscViewerASCIIPrintf(viewer, "Resets: %i\n", cg->resets);CHKERRQ(ierr); 294 295 ierr = PetscViewerASCIIPrintf(viewer, " Not a descent dir.: %i\n", cg->descent_error);CHKERRQ(ierr); 296 ierr = PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails);CHKERRQ(ierr); 297 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 298 } 299 PetscFunctionReturn(0); 300 } 301 302 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha) 303 { 304 PetscReal a, b, c, sig1, sig2; 305 306 PetscFunctionBegin; 307 *scale = 0.0; 308 309 if (1.0 == alpha){ 310 *scale = yts/yty; 311 } else if (0.0 == alpha){ 312 *scale = sts/yts; 313 } 314 else if (-1.0 == alpha) *scale = 1.0; 315 else { 316 a = yty; 317 b = yts; 318 c = sts; 319 a *= alpha; 320 b *= -(2.0*alpha - 1.0); 321 c *= alpha - 1.0; 322 sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 323 sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 324 /* accept the positive root as the scalar */ 325 if (sig1 > 0.0) { 326 *scale = sig1; 327 } else if (sig2 > 0.0) { 328 *scale = sig2; 329 } else { 330 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar"); 331 } 332 } 333 PetscFunctionReturn(0); 334 } 335 336 /*MC 337 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 338 339 Options Database Keys: 340 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled) 341 . -tao_bncg_eta <r> - restart tolerance 342 . -tao_bncg_type <taocg_type> - cg formula 343 . -tao_bncg_as_type <none,bertsekas> - active set estimation method 344 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation 345 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation 346 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision. 347 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method 348 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 349 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 350 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method 351 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method 352 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method 353 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method 354 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True. 355 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods 356 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts 357 . -tao_bncg_use_steffenson <b> (not implemented) - use Steffenson acceleration 358 . -tao_bncg_zeta <r> - Scaling parameter in the KD method 359 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart 360 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem 361 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations 362 . -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions 363 364 Notes: 365 CG formulas are: 366 "gd" - Gradient Descent 367 "fr" - Fletcher-Reeves 368 "pr" - Polak-Ribiere-Polyak 369 "prp" - Polak-Ribiere-Plus 370 "hs" - Hestenes-Steifel 371 "dy" - Dai-Yuan 372 "ssml_bfgs" - Self-Scaling Memoryless BFGS 373 "ssml_dfp" - Self-Scaling Memoryless DFP 374 "ssml_brdn" - Self-Scaling Memoryless Broyden 375 "hz" - Hager-Zhang (CG_DESCENT 5.3) 376 "dk" - Dai-Kou (2013) 377 "kd" - Kou-Dai (2015) 378 Level: beginner 379 M*/ 380 381 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) 382 { 383 TAO_BNCG *cg; 384 const char *morethuente_type = TAOLINESEARCHMT; 385 PetscErrorCode ierr; 386 387 PetscFunctionBegin; 388 tao->ops->setup = TaoSetUp_BNCG; 389 tao->ops->solve = TaoSolve_BNCG; 390 tao->ops->view = TaoView_BNCG; 391 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 392 tao->ops->destroy = TaoDestroy_BNCG; 393 394 /* Override default settings (unless already changed) */ 395 if (!tao->max_it_changed) tao->max_it = 2000; 396 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 397 398 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 399 /* method. In particular, gtol should be less that 0.5; the value used in */ 400 /* Nocedal and Wright is 0.10. We use the default values for the */ 401 /* linesearch because it seems to work better. */ 402 ierr = TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch);CHKERRQ(ierr); 403 ierr = PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1);CHKERRQ(ierr); 404 ierr = TaoLineSearchSetType(tao->linesearch, morethuente_type);CHKERRQ(ierr); 405 ierr = TaoLineSearchUseTaoRoutines(tao->linesearch, tao);CHKERRQ(ierr); 406 ierr = TaoLineSearchSetOptionsPrefix(tao->linesearch,tao->hdr.prefix);CHKERRQ(ierr); 407 408 ierr = PetscNewLog(tao,&cg);CHKERRQ(ierr); 409 tao->data = (void*)cg; 410 411 ierr = MatCreate(PetscObjectComm((PetscObject)tao), &cg->B);CHKERRQ(ierr); 412 ierr = PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1);CHKERRQ(ierr); 413 ierr = MatSetOptionsPrefix(cg->B, "tao_bncg_");CHKERRQ(ierr); 414 ierr = MatSetType(cg->B, MATLMVMDIAGBRDN);CHKERRQ(ierr); 415 416 cg->dk_eta = 0.5; 417 cg->hz_eta = 0.4; 418 cg->dynamic_restart = PETSC_FALSE; 419 cg->unscaled_restart = PETSC_FALSE; 420 cg->theta = 1.0; 421 cg->hz_theta = 1.0; 422 cg->dfp_scale = 1.0; 423 cg->bfgs_scale = 1.0; 424 cg->zeta = 0.1; 425 cg->min_quad = 6; 426 cg->use_steffenson = PETSC_FALSE; 427 cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/ 428 cg->xi = 1.0; 429 cg->neg_xi = PETSC_TRUE; 430 cg->spaced_restart = PETSC_FALSE; 431 cg->tol_quad = 1e-8; 432 cg->as_step = 0.001; 433 cg->as_tol = 0.001; 434 cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/ 435 cg->as_type = CG_AS_BERTSEKAS; 436 cg->cg_type = CG_SSML_BFGS; 437 cg->recycle = PETSC_FALSE; 438 cg->alpha = 1.0; 439 cg->diag_scaling = PETSC_TRUE; 440 PetscFunctionReturn(0); 441 } 442 443 444 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq) 445 { 446 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 447 PetscErrorCode ierr; 448 PetscReal scaling; 449 450 PetscFunctionBegin; 451 ++cg->resets; 452 scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23); 453 scaling = PetscMin(100.0, PetscMax(1e-7, scaling)); 454 if (cg->unscaled_restart) scaling = 1.0; 455 ierr = VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient);CHKERRQ(ierr); 456 /* Also want to reset our diagonal scaling with each restart */ 457 if (cg->diag_scaling) { 458 ierr = MatLMVMReset(cg->B, PETSC_FALSE);CHKERRQ(ierr); 459 } 460 PetscFunctionReturn(0); 461 } 462 463 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold) 464 { 465 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 466 PetscReal quadinterp; 467 468 PetscFunctionBegin; 469 if (cg->f < cg->min_quad/10) { 470 *dynrestart = PETSC_FALSE; 471 PetscFunctionReturn(0); 472 } /* just skip this since this strategy doesn't work well for functions near zero */ 473 quadinterp = 2*(cg->f - fold)/(stepsize*(gd + gd_old)); 474 if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad; 475 else { 476 cg->iter_quad = 0; 477 *dynrestart = PETSC_FALSE; 478 } 479 if (cg->iter_quad >= cg->min_quad) { 480 cg->iter_quad = 0; 481 *dynrestart = PETSC_TRUE; 482 } 483 PetscFunctionReturn(0); 484 } 485 486 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscBool cg_restart, PetscReal dnorm, PetscReal ginner) 487 { 488 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 489 PetscErrorCode ierr; 490 PetscReal gamma = 1.0, tau_k, beta; 491 PetscReal tmp = 1.0, ynorm, ynorm2, snorm = 1.0, dk_yk=1.0, gd; 492 PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg; 493 PetscInt dim; 494 PetscFunctionBegin; 495 496 /* Local curvature check to see if we need to restart */ 497 if (tao->niter >= 1){ 498 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr); 499 ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr); 500 ynorm2 = ynorm*ynorm; 501 ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr); 502 if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON) cg_restart = 1; 503 if (cg->spaced_restart) { 504 ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr); 505 if (tao->niter % (dim*cg->min_restart_num)) cg_restart = 1; 506 } 507 } 508 /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */ 509 if (cg->spaced_restart){ 510 ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr); 511 if (0 == tao->niter % (6*dim)) cg_restart = 1; 512 } 513 /* Compute the diagonal scaling vector if applicable */ 514 if (cg->diag_scaling) { 515 ierr = MatLMVMUpdate(cg->B, tao->solution, tao->gradient);CHKERRQ(ierr); 516 } 517 518 /* A note on diagonal scaling (to be added to paper): For the FR, PR, PRP, and DY methods, the diagonally scaled versions must be derived as a preconditioned CG method rather than as a Hessian initialization like in the Broyden methods. */ 519 520 /* In that case, one writes the objective function as f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k). Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the same under preconditioning. Note that A is diagonal, such that A^T = A. */ 521 522 /* This yields questions like what the dot product d_k^T y_k should look like. HZ mistakenly treats that as the same under preconditioning, but that is not necessarily true. */ 523 524 /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation, we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}), yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is NOT the same if our preconditioning matrix is updated between iterations. This same issue is found when considering dot products of the form g_{k+1}^T y_k. */ 525 526 /* As of now, for PR, PRP, and DY, the above considerations have not been fully taken into account, explaining their poorer performance. FR is implemented correctly, and yields some marginal improvement on performance. Working on the rest now. */ 527 528 /* Compute CG step direction */ 529 if (cg_restart) { 530 ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr); 531 } else if (ynorm2 > PETSC_MACHINE_EPSILON) { 532 switch (cg->cg_type) { 533 case CG_GradientDescent: 534 if (!cg->diag_scaling){ 535 cg->sts = step*step*dnorm*dnorm; 536 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr); 537 ierr = VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient);CHKERRQ(ierr); 538 } else { 539 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 540 ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);CHKERRQ(ierr); 541 } 542 break; 543 case CG_HestenesStiefel: 544 /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */ 545 if (!cg->diag_scaling){ 546 cg->sts = step*step*dnorm*dnorm; 547 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 548 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr); 549 beta = tau_k*gkp1_yk/dk_yk; 550 ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr); 551 } else { 552 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 553 ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr); 554 beta = gkp1_yk/dk_yk; 555 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr); 556 } 557 break; 558 case CG_FletcherReeves: 559 ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr); 560 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr); 561 ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr); 562 ynorm2 = ynorm*ynorm; 563 ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr); 564 if (!cg->diag_scaling){ 565 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha);CHKERRQ(ierr); 566 beta = tau_k*gnorm2/gnorm2_old; 567 ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr); 568 } else { 569 ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Before it's updated */ 570 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 571 ierr = VecDot(tao->gradient, cg->g_work, &tmp);CHKERRQ(ierr); 572 beta = tmp/gnorm2_old; 573 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr); 574 } 575 break; 576 case CG_PolakRibierePolyak: 577 snorm = step*dnorm; 578 if (!cg->diag_scaling){ 579 ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr); 580 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 581 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr); 582 beta = tau_k*gkp1_yk/gnorm2_old; 583 ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr); 584 } else { 585 ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); 586 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 587 ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr); 588 beta = gkp1_yk/gnorm2_old; 589 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr); 590 } 591 break; 592 case CG_PolakRibierePlus: 593 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr); 594 ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr); 595 ynorm2 = ynorm*ynorm; 596 if (!cg->diag_scaling){ 597 ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr); 598 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 599 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr); 600 beta = tau_k*gkp1_yk/gnorm2_old; 601 beta = PetscMax(beta, 0.0); 602 ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr); 603 } else { 604 ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Old gtDg */ 605 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 606 ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr); 607 beta = gkp1_yk/gnorm2_old; 608 beta = PetscMax(beta, 0.0); 609 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr); 610 } 611 break; 612 case CG_DaiYuan: /* TODO: suspect this is broken in the diag part - test4 */ 613 if (!cg->diag_scaling){ 614 ierr = VecDot(tao->stepdirection, tao->gradient, &gd);CHKERRQ(ierr); 615 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 616 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha);CHKERRQ(ierr); 617 beta = tau_k*gnorm2/(gd - gd_old); 618 ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr); 619 } else { 620 ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr); 621 ierr = MatMult(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 622 ierr = VecDot(cg->g_work, tao->gradient, >Dg);CHKERRQ(ierr); 623 ierr = VecDot(tao->stepdirection, cg->G_old, &gd_old);CHKERRQ(ierr); 624 ierr = VecDot(cg->d_work, cg->g_work, &dk_yk);CHKERRQ(ierr); 625 dk_yk = dk_yk - gd_old; 626 beta = gtDg/dk_yk; 627 ierr = VecScale(cg->d_work, beta); 628 ierr = VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work);CHKERRQ(ierr); 629 } 630 break; 631 case CG_SSML_BFGS: 632 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 633 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr); 634 snorm = step*dnorm; 635 cg->yts = dk_yk*step; 636 cg->yty = ynorm2; 637 cg->sts = snorm*snorm; 638 if (!cg->diag_scaling){ 639 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 640 ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr); 641 tmp = gd/dk_yk; 642 beta = tau_k*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*tmp; 643 /* d <- -t*g + beta*t*d + t*tmp*yk */ 644 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr); 645 } else { 646 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 647 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 648 ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr); 649 /* compute scalar gamma */ 650 ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr); 651 ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr); 652 gamma = gd/dk_yk; 653 /* Compute scalar beta */ 654 beta = (gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 655 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 656 ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr); 657 } 658 break; 659 case CG_SSML_DFP: 660 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 661 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr); 662 snorm = step*dnorm; 663 cg->yts = dk_yk*step; 664 cg->yty = ynorm2; 665 cg->sts = snorm*snorm; 666 if (!cg->diag_scaling){ 667 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 668 ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr); 669 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 670 tau_k = cg->dfp_scale*tau_k; 671 tmp = tau_k*gkp1_yk/ynorm2; 672 beta = -step*gd/dk_yk; 673 /* d <- -t*g + beta*d + tmp*yk */ 674 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr); 675 } else { 676 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */ 677 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 678 ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr); 679 /* compute scalar gamma */ 680 ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr); 681 ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr); 682 gamma = (gkp1_yk/tmp); 683 /* Compute scalar beta */ 684 beta = -step*gd/dk_yk; 685 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 686 ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr); 687 } 688 break; 689 case CG_SSML_BROYDEN: 690 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 691 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr); 692 snorm = step*dnorm; 693 cg->yts = step*dk_yk; 694 cg->yty = ynorm2; 695 cg->sts = snorm*snorm; 696 if (!cg->diag_scaling){ 697 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 698 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale);CHKERRQ(ierr); 699 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale);CHKERRQ(ierr); 700 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 701 tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp; 702 /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0, it should reproduce the tau_dfp scaling. Same with dfp_scale. */ 703 tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/ynorm2; 704 beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 705 /* d <- -t*g + beta*d + tmp*yk */ 706 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr); 707 } else { 708 /* We have diagonal scaling enabled */ 709 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 710 ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr); 711 /* compute scalar gamma */ 712 ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr); 713 ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr); 714 gamma = cg->theta*gd/dk_yk + (1-cg->theta)*(gkp1_yk/tmp); 715 /* Compute scalar beta */ 716 beta = cg->theta*(gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 717 /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 718 ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr); 719 } 720 break; 721 case CG_HagerZhang: 722 /* Their 2006 paper. Comes from deleting the y_k term from SSML_BFGS, introducing a theta parameter, and using a cutoff for beta. See DK 2013 pg. 315 for a review of CG_DESCENT 5.3. One can use the dynamic restart strategy they implement, but it doesn't work well for us. */ 723 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 724 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 725 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr); 726 snorm = dnorm*step; 727 cg->yts = step*dk_yk; 728 if (cg->use_dynamic_restart){ 729 ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr); 730 } 731 if (cg->dynamic_restart){ 732 ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr); 733 } else { 734 if (!cg->diag_scaling){ 735 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 736 ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr); 737 /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */ 738 tmp = gd/dk_yk; 739 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)); 740 /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */ 741 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 742 /* d <- -t*g + beta*t*d */ 743 ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr); 744 } else { 745 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 746 cg->yty = ynorm2; 747 cg->sts = snorm*snorm; 748 /* Apply the diagonal scaling to all my vectors */ 749 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 750 ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr); 751 ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr); 752 /* Construct the constant ytDgkp1 */ 753 ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr); 754 /* Construct the constant for scaling Dkyk in the update */ 755 tmp = gd/dk_yk; 756 /* tau_bfgs can be a parameter we tune via command line in future versions. For now, just setting to one. May instead put this inside the ComputeDiagonalScaling function... */ 757 cg->tau_bfgs = 1.0; 758 ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr); 759 tau_k = -tau_k*gd/(dk_yk*dk_yk); 760 /* beta is the constant which adds the dk contribution */ 761 beta = cg->tau_bfgs*gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */ 762 /* From HZ2013, modified to account for diagonal scaling*/ 763 ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr); 764 ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr); 765 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 766 /* Do the update */ 767 ierr = VecAXPBY(tao->stepdirection, -cg->tau_bfgs, beta, cg->g_work);CHKERRQ(ierr); 768 } 769 } 770 break; 771 case CG_DaiKou: 772 /* 2013 paper. */ 773 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 774 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 775 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr); 776 snorm = step*dnorm; 777 cg->yts = dk_yk*step; 778 if (!cg->diag_scaling){ 779 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 780 ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr); 781 /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */ 782 tmp = gd/dk_yk; 783 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk; 784 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 785 /* d <- -t*g + beta*t*d */ 786 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk);CHKERRQ(ierr); 787 } else { 788 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 789 cg->yty = ynorm2; 790 cg->sts = snorm*snorm; 791 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 792 ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr); 793 ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr); 794 /* Construct the constant ytDgkp1 */ 795 ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr); 796 /* tau_bfgs can be a parameter we tune via command line in future versions. For now, just setting to one. May instead put this inside the ComputeDiagonalScaling function... */ 797 cg->tau_bfgs = 1.0; 798 ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr); 799 tau_k = tau_k*gd/(dk_yk*dk_yk); 800 tmp = gd/dk_yk; 801 /* beta is the constant which adds the dk contribution */ 802 beta = cg->tau_bfgs*gkp1_yk/dk_yk - step*tmp - tau_k; 803 /* Update this for the last term in beta */ 804 ierr = VecDot(cg->y_work, tao->stepdirection, &dk_yk);CHKERRQ(ierr); 805 beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */ 806 ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr); 807 ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr); 808 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 809 /* Do the update */ 810 ierr = VecAXPBY(tao->stepdirection, -cg->tau_bfgs, beta, cg->g_work);CHKERRQ(ierr); 811 } 812 break; 813 case CG_KouDai: 814 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 815 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 816 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr); 817 snorm = step*dnorm; 818 cg->yts = dk_yk*step; 819 if (cg->use_dynamic_restart){ 820 ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr); 821 } 822 if (cg->dynamic_restart){ 823 ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr); 824 } else { 825 if (!cg->diag_scaling){ 826 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 827 ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr); 828 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 829 if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */ 830 { 831 beta = cg->zeta*tau_k*gd/(dnorm*dnorm); 832 gamma = 0.0; 833 } else { 834 if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk; 835 /* This seems to be very effective when there's no tau_k scaling. This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */ 836 else { 837 gamma = cg->xi*gd/dk_yk; 838 } 839 } 840 /* d <- -t*g + beta*t*d + t*tmp*yk */ 841 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr); 842 } else { 843 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 844 cg->yty = ynorm2; 845 cg->sts = snorm*snorm; 846 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 847 ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr); 848 /* Construct the constant ytDgkp1 */ 849 ierr = VecDot(cg->yk, cg->g_work, &gkp1D_yk);CHKERRQ(ierr); 850 /* Construct the constant for scaling Dkyk in the update */ 851 gamma = gd/dk_yk; 852 /* tau_k = -ytDy/(ytd)^2 * gd */ 853 ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr); 854 tau_k = tau_k*gd/(dk_yk*dk_yk); 855 /* beta is the constant which adds the d_k contribution */ 856 beta = gkp1D_yk/dk_yk - step*gamma - tau_k; 857 /* Here is the requisite check */ 858 ierr = VecDot(tao->stepdirection, cg->g_work, &tmp);CHKERRQ(ierr); 859 if (cg->neg_xi){ 860 /* modified KD implementation */ 861 if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk; 862 else { 863 gamma = cg->xi*gd/dk_yk; 864 } 865 if (beta < cg->zeta*tmp/(dnorm*dnorm)){ 866 beta = cg->zeta*tmp/(dnorm*dnorm); 867 gamma = 0.0; 868 } 869 } else { /* original KD 2015 implementation */ 870 if (beta < cg->zeta*tmp/(dnorm*dnorm)) { 871 beta = cg->zeta*tmp/(dnorm*dnorm); 872 gamma = 0.0; 873 } else { 874 gamma = cg->xi*gd/dk_yk; 875 } 876 } 877 /* Do the update in two steps */ 878 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr); 879 ierr = VecAXPY(tao->stepdirection, gamma, cg->y_work);CHKERRQ(ierr); 880 } 881 } 882 break; 883 default: 884 beta = 0.0; 885 break; 886 } 887 } 888 cg_restart = 0; /* Will check in next iteration */ 889 PetscFunctionReturn(0); 890 } 891 892 PETSC_INTERN PetscErrorCode TaoBNCGSteffensonAcceleration(Tao tao) 893 { 894 /* Steffenson is currently an experimental (broken) feature. Do not use. */ 895 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 896 PetscErrorCode ierr; 897 PetscReal mag1, mag2; 898 PetscReal resnorm; 899 PetscReal steff_f; 900 PetscFunctionBegin; 901 if (tao->niter > 2 && tao->niter % 2 == 0){ 902 ierr = VecCopy(cg->steffnp1, cg->steffnm1);CHKERRQ(ierr); /* X_np1 to X_nm1 since it's been two iterations*/ 903 ierr = VecCopy(cg->X_old, cg->steffn);CHKERRQ(ierr); /* Get X_n */ 904 ierr = VecCopy(tao->solution, cg->steffnp1);CHKERRQ(ierr); 905 906 /* Begin step 4 */ 907 ierr = VecCopy(cg->steffnm1, cg->W);CHKERRQ(ierr); 908 ierr = VecAXPBY(cg->W, 1.0, -1.0, cg->steffn);CHKERRQ(ierr); 909 ierr = VecDot(cg->W, cg->W, &mag1);CHKERRQ(ierr); 910 ierr = VecAXPBYPCZ(cg->W, -1.0, 1.0, -1.0, cg->steffn, cg->steffnp1);CHKERRQ(ierr); 911 ierr = VecDot(cg->W, cg->W, &mag2);CHKERRQ(ierr); 912 ierr = VecCopy(cg->steffnm1, cg->steffva);CHKERRQ(ierr); 913 ierr = VecAXPY(cg->steffva, -mag1/mag2, cg->W);CHKERRQ(ierr); 914 915 ierr = TaoComputeObjectiveAndGradient(tao, cg->steffva, &steff_f, cg->g_work);CHKERRQ(ierr); 916 917 /* Check if the accelerated point has converged*/ 918 ierr = VecFischer(cg->steffva, cg->g_work, tao->XL, tao->XU, cg->W);CHKERRQ(ierr); 919 ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr); 920 if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); 921 ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); 922 ierr = PetscPrintf(PETSC_COMM_SELF, "va f: %20.19e\t va gnorm: %20.19e\n", steff_f, resnorm);CHKERRQ(ierr); 923 924 } else if (2 == tao->niter){ 925 ierr = VecCopy(tao->solution, cg->steffnp1);CHKERRQ(ierr); 926 mag1 = cg->sts; /* = |x1 - x0|^2 */ 927 ierr = VecCopy(cg->steffnm1, cg->steffvatmp);CHKERRQ(ierr); 928 ierr = VecAXPBYPCZ(cg->steffva, 1.0, -1.0, 0.0, cg->steffnp1, cg->steffn);CHKERRQ(ierr); 929 ierr = VecAXPBYPCZ(cg->steffva, 1.0, -1.0, 1.0, cg->steffnm1, cg->steffn);CHKERRQ(ierr); 930 ierr = VecNorm(cg->steffva, NORM_2, &mag2);CHKERRQ(ierr); 931 mag2 = mag2*mag2; 932 ierr = VecAXPBY(cg->steffva, 1.0, -mag1/mag2, cg->steffvatmp);CHKERRQ(ierr); 933 // finished step 2 934 } else if (1 == tao->niter){ 935 ierr = VecCopy(cg->X_old, cg->steffnm1);CHKERRQ(ierr); 936 ierr = VecCopy(tao->solution, cg->steffn);CHKERRQ(ierr); 937 } 938 /* Now have step 2 done of method */ 939 PetscFunctionReturn(0); 940 } 941 942 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm) 943 { 944 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 945 PetscErrorCode ierr; 946 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 947 PetscReal step=1.0,gnorm2,gd,ginner,dnorm; 948 PetscReal gnorm2_old,f_old,resnorm, gnorm_old; 949 PetscBool cg_restart, gd_fallback = PETSC_FALSE; 950 951 PetscFunctionBegin; 952 /* We are now going to perform a line search along the direction. */ 953 ++tao->niter; 954 955 /* Store solution and gradient info before it changes */ 956 ierr = VecCopy(tao->solution, cg->X_old);CHKERRQ(ierr); 957 ierr = VecCopy(tao->gradient, cg->G_old);CHKERRQ(ierr); 958 ierr = VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old);CHKERRQ(ierr); 959 960 gnorm_old = gnorm; 961 gnorm2_old = gnorm_old*gnorm_old; 962 f_old = cg->f; 963 /* Perform bounded line search */ 964 ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr); 965 ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr); 966 ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); 967 968 /* Check linesearch failure */ 969 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 970 ++cg->ls_fails; 971 972 if (cg->cg_type == CG_GradientDescent || gd_fallback){ 973 /* Nothing left to do but fail out of the optimization */ 974 step = 0.0; 975 tao->reason = TAO_DIVERGED_LS_FAILURE; 976 } else { 977 /* Restore previous point */ 978 ierr = VecCopy(cg->X_old, tao->solution);CHKERRQ(ierr); 979 ierr = VecCopy(cg->G_old, tao->gradient);CHKERRQ(ierr); 980 ierr = VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient);CHKERRQ(ierr); 981 982 gnorm = gnorm_old; 983 gnorm2 = gnorm2_old; 984 cg->f = f_old; 985 986 /* Fall back on the scaled gradient step */ 987 ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr); 988 ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr); 989 990 ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr); 991 ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr); 992 ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); 993 994 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){ 995 /* Nothing left to do but fail out of the optimization */ 996 ++cg->ls_fails; 997 step = 0.0; 998 tao->reason = TAO_DIVERGED_LS_FAILURE; 999 } 1000 } 1001 } 1002 /* Convergence test for line search failure */ 1003 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1004 1005 /* Standard convergence test */ 1006 /* Make sure convergence test is the same. */ 1007 ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr); 1008 ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr); 1009 if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); 1010 ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr); 1011 ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr); 1012 ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); 1013 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1014 1015 /* Assert we have an updated step and we need at least one more iteration. */ 1016 /* Calculate the next direction */ 1017 /* Estimate the active set at the new solution */ 1018 ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr); 1019 /* Compute the projected gradient and its norm */ 1020 ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr); 1021 ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr); 1022 ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr); 1023 gnorm2 = gnorm*gnorm; 1024 1025 /* Check restart conditions for using steepest descent */ 1026 cg_restart = 0; 1027 ierr = VecDot(tao->gradient, cg->G_old, &ginner);CHKERRQ(ierr); 1028 ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr); 1029 1030 ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, cg_restart, dnorm, ginner);CHKERRQ(ierr); 1031 1032 ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr); 1033 1034 if (cg->cg_type != CG_GradientDescent) { 1035 /* Figure out which previously active variables became inactive this iteration */ 1036 ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr); 1037 if (cg->inactive_idx && cg->inactive_old) { 1038 ierr = ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives);CHKERRQ(ierr); 1039 } 1040 /* Selectively reset the CG step those freshly inactive variables */ 1041 if (cg->new_inactives) { 1042 ierr = VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr); 1043 ierr = VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr); 1044 ierr = VecCopy(cg->inactive_grad, cg->inactive_step);CHKERRQ(ierr); 1045 ierr = VecScale(cg->inactive_step, -1.0);CHKERRQ(ierr); 1046 ierr = VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr); 1047 ierr = VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr); 1048 } 1049 1050 /* Verify that this is a descent direction */ 1051 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 1052 ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr); 1053 if (PetscIsInfOrNanReal(gd) || (gd/(dnorm*dnorm) <= -1e10 || gd/(dnorm*dnorm) >= -1e-10)) { 1054 /* Not a descent direction, so we reset back to projected gradient descent */ 1055 PetscPrintf(PETSC_COMM_SELF, "gtd/(dtd) is small or positive: %20.19e\n", gd/(dnorm*dnorm)); 1056 ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr); 1057 ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr); 1058 ++cg->descent_error; 1059 gd_fallback = PETSC_TRUE; 1060 } else { 1061 gd_fallback = PETSC_FALSE; 1062 } 1063 } 1064 PetscFunctionReturn(0); 1065 } 1066