1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> 3 4 #define CG_FletcherReeves 0 5 #define CG_PolakRibiere 1 6 #define CG_PolakRibierePlus 2 7 #define CG_HestenesStiefel 3 8 #define CG_DaiYuan 4 9 #define CG_Types 5 10 11 static const char *CG_Table[64] = {"fr", "pr", "prp", "hs", "dy"}; 12 13 #define CG_AS_NONE 0 14 #define CG_AS_BERTSEKAS 1 15 #define CG_AS_SIZE 2 16 17 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 18 19 PetscErrorCode TaoBNCGSetRecycleFlag(Tao tao, PetscBool recycle) 20 { 21 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 22 23 PetscFunctionBegin; 24 cg->recycle = recycle; 25 PetscFunctionReturn(0); 26 } 27 28 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) 29 { 30 PetscErrorCode ierr; 31 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 32 33 PetscFunctionBegin; 34 ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr); 35 if (cg->inactive_idx) { 36 ierr = ISDuplicate(cg->inactive_idx, &cg->inactive_old);CHKERRQ(ierr); 37 ierr = ISCopy(cg->inactive_idx, cg->inactive_old);CHKERRQ(ierr); 38 } 39 switch (asType) { 40 case CG_AS_NONE: 41 ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr); 42 ierr = VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx);CHKERRQ(ierr); 43 ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr); 44 ierr = ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx);CHKERRQ(ierr); 45 break; 46 47 case CG_AS_BERTSEKAS: 48 /* Use gradient descent to estimate the active set */ 49 ierr = VecCopy(cg->unprojected_gradient, cg->W);CHKERRQ(ierr); 50 ierr = VecScale(cg->W, -1.0);CHKERRQ(ierr); 51 ierr = TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, 52 &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);CHKERRQ(ierr); 53 break; 54 55 default: 56 break; 57 } 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step) 62 { 63 PetscErrorCode ierr; 64 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 65 66 PetscFunctionBegin; 67 switch (asType) { 68 case CG_AS_NONE: 69 ierr = VecISSet(step, cg->active_idx, 0.0);CHKERRQ(ierr); 70 break; 71 72 case CG_AS_BERTSEKAS: 73 ierr = TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step);CHKERRQ(ierr); 74 break; 75 76 default: 77 break; 78 } 79 PetscFunctionReturn(0); 80 } 81 82 static PetscErrorCode TaoSolve_BNCG(Tao tao) 83 { 84 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 85 PetscErrorCode ierr; 86 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 87 PetscReal step=1.0,gnorm,gnorm2,gd,ginner,beta,dnorm; 88 PetscReal gd_old,gnorm2_old,f_old,resnorm; 89 PetscBool cg_restart; 90 PetscInt nDiff; 91 92 PetscFunctionBegin; 93 /* Project the current point onto the feasible set */ 94 ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); 95 ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr); 96 97 /* Project the initial point onto the feasible region */ 98 ierr = TaoBoundSolution(tao->XL,tao->XU,tao->solution, 0.0, &nDiff);CHKERRQ(ierr); 99 100 if (!cg->recycle) { 101 /* Solver is not being recycled so just compute the objective function and criteria */ 102 ierr = TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient);CHKERRQ(ierr); 103 } else { 104 /* We are recycling, so we have to compute ||g_old||^2 for use in the CG step calculation */ 105 ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr); 106 } 107 ierr = VecNorm(cg->unprojected_gradient,NORM_2,&gnorm);CHKERRQ(ierr); 108 if (PetscIsInfOrNanReal(cg->f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); 109 110 /* Estimate the active set and compute the projected gradient */ 111 ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr); 112 113 /* Project the gradient and calculate the norm */ 114 ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr); 115 ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr); 116 ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr); 117 gnorm2 = gnorm*gnorm; 118 119 /* Convergence check */ 120 tao->niter = 0; 121 tao->reason = TAO_CONTINUE_ITERATING; 122 ierr = TaoLogConvergenceHistory(tao, cg->f, gnorm, 0.0, tao->ksp_its);CHKERRQ(ierr); 123 ierr = TaoMonitor(tao, tao->niter, cg->f, gnorm, 0.0, step);CHKERRQ(ierr); 124 ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); 125 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 126 127 /* Start optimization iterations */ 128 cg->ls_fails = cg->broken_ortho = cg->descent_error = 0; 129 cg->resets = -1; 130 while (tao->reason == TAO_CONTINUE_ITERATING) { 131 ++tao->niter; 132 133 /* Check restart conditions for using steepest descent */ 134 cg_restart = PETSC_FALSE; 135 ierr = VecDot(tao->gradient, cg->G_old, &ginner);CHKERRQ(ierr); 136 ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr); 137 if (tao->niter == 1 && !cg->recycle && dnorm != 0.0) { 138 /* 1) First iteration, with recycle disabled, and a non-zero previous step */ 139 cg_restart = PETSC_TRUE; 140 } else if (PetscAbsScalar(ginner) >= cg->eta * gnorm2) { 141 /* 2) Gradients are far from orthogonal */ 142 cg_restart = PETSC_TRUE; 143 ++cg->broken_ortho; 144 } 145 146 /* Compute CG step */ 147 if (cg_restart) { 148 beta = 0.0; 149 ++cg->resets; 150 } else { 151 switch (cg->cg_type) { 152 case CG_FletcherReeves: 153 beta = gnorm2 / gnorm2_old; 154 break; 155 156 case CG_PolakRibiere: 157 beta = (gnorm2 - ginner) / gnorm2_old; 158 break; 159 160 case CG_PolakRibierePlus: 161 beta = PetscMax((gnorm2-ginner)/gnorm2_old, 0.0); 162 break; 163 164 case CG_HestenesStiefel: 165 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 166 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 167 beta = (gnorm2 - ginner) / (gd - gd_old); 168 break; 169 170 case CG_DaiYuan: 171 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 172 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 173 beta = gnorm2 / (gd - gd_old); 174 break; 175 176 default: 177 beta = 0.0; 178 break; 179 } 180 } 181 182 /* Compute the direction d=-g + beta*d */ 183 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, tao->gradient);CHKERRQ(ierr); 184 ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr); 185 186 /* Figure out which previously active variables became inactive this iteration */ 187 ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr); 188 if (cg->inactive_idx && cg->inactive_old) { 189 ierr = ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives);CHKERRQ(ierr); 190 } 191 192 /* Selectively reset the CG step those freshly inactive variables */ 193 if (cg->new_inactives) { 194 ierr = VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr); 195 ierr = VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr); 196 ierr = VecCopy(cg->inactive_grad, cg->inactive_step);CHKERRQ(ierr); 197 ierr = VecScale(cg->inactive_step, -1.0);CHKERRQ(ierr); 198 ierr = VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr); 199 ierr = VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr); 200 } 201 202 /* Verify that this is a descent direction */ 203 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 204 ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm); 205 if (gd > -cg->rho*PetscPowReal(dnorm, cg->pow)) { 206 /* Not a descent direction, so we reset back to projected gradient descent */ 207 ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, tao->gradient);CHKERRQ(ierr); 208 ++cg->resets; 209 ++cg->descent_error; 210 } 211 212 /* Store solution and gradient info before it changes */ 213 ierr = VecCopy(tao->solution, cg->X_old);CHKERRQ(ierr); 214 ierr = VecCopy(tao->gradient, cg->G_old);CHKERRQ(ierr); 215 ierr = VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old);CHKERRQ(ierr); 216 gnorm2_old = gnorm2; 217 f_old = cg->f; 218 219 /* Perform bounded line search */ 220 ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr); 221 ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); 222 223 /* Check linesearch failure */ 224 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 225 ++cg->ls_fails; 226 /* Restore previous point */ 227 gnorm2 = gnorm2_old; 228 cg->f = f_old; 229 ierr = VecCopy(cg->X_old, tao->solution);CHKERRQ(ierr); 230 ierr = VecCopy(cg->G_old, tao->gradient);CHKERRQ(ierr); 231 ierr = VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient);CHKERRQ(ierr); 232 233 /* Fall back on the gradient descent step */ 234 ierr = VecCopy(tao->gradient, tao->stepdirection);CHKERRQ(ierr); 235 ierr = VecScale(tao->stepdirection, -1.0);CHKERRQ(ierr); 236 ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr); 237 ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr); 238 ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); 239 240 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){ 241 ++cg->ls_fails; 242 /* Restore previous point */ 243 gnorm2 = gnorm2_old; 244 cg->f = f_old; 245 ierr = VecCopy(cg->X_old, tao->solution);CHKERRQ(ierr); 246 ierr = VecCopy(cg->G_old, tao->gradient);CHKERRQ(ierr); 247 ierr = VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient);CHKERRQ(ierr); 248 249 /* Nothing left to do but fail out of the optimization */ 250 step = 0.0; 251 tao->reason = TAO_DIVERGED_LS_FAILURE; 252 } 253 } 254 255 if (tao->reason != TAO_DIVERGED_LS_FAILURE) { 256 /* Estimate the active set at the new solution */ 257 ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr); 258 259 /* Compute the projected gradient and its norm */ 260 ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr); 261 ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr); 262 ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr); 263 gnorm2 = gnorm*gnorm; 264 } 265 266 /* Convergence test */ 267 ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr); 268 ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr); 269 if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); 270 ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr); 271 ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr); 272 ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); 273 } 274 PetscFunctionReturn(0); 275 } 276 277 static PetscErrorCode TaoSetUp_BNCG(Tao tao) 278 { 279 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 280 PetscErrorCode ierr; 281 282 PetscFunctionBegin; 283 if (!tao->gradient) { 284 ierr = VecDuplicate(tao->solution,&tao->gradient);CHKERRQ(ierr); 285 } 286 if (!tao->stepdirection) { 287 ierr = VecDuplicate(tao->solution,&tao->stepdirection);CHKERRQ(ierr); 288 } 289 if (!cg->W) { 290 ierr = VecDuplicate(tao->solution,&cg->W);CHKERRQ(ierr); 291 } 292 if (!cg->work) { 293 ierr = VecDuplicate(tao->solution,&cg->work);CHKERRQ(ierr); 294 } 295 if (!cg->X_old) { 296 ierr = VecDuplicate(tao->solution,&cg->X_old);CHKERRQ(ierr); 297 } 298 if (!cg->G_old) { 299 ierr = VecDuplicate(tao->gradient,&cg->G_old);CHKERRQ(ierr); 300 } 301 if (!cg->unprojected_gradient) { 302 ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient);CHKERRQ(ierr); 303 } 304 if (!cg->unprojected_gradient_old) { 305 ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient_old);CHKERRQ(ierr); 306 } 307 PetscFunctionReturn(0); 308 } 309 310 static PetscErrorCode TaoDestroy_BNCG(Tao tao) 311 { 312 TAO_BNCG *cg = (TAO_BNCG*) tao->data; 313 PetscErrorCode ierr; 314 315 PetscFunctionBegin; 316 if (tao->setupcalled) { 317 ierr = VecDestroy(&cg->W);CHKERRQ(ierr); 318 ierr = VecDestroy(&cg->work);CHKERRQ(ierr); 319 ierr = VecDestroy(&cg->X_old);CHKERRQ(ierr); 320 ierr = VecDestroy(&cg->G_old);CHKERRQ(ierr); 321 ierr = VecDestroy(&cg->unprojected_gradient);CHKERRQ(ierr); 322 ierr = VecDestroy(&cg->unprojected_gradient_old);CHKERRQ(ierr); 323 } 324 ierr = PetscFree(tao->data);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao) 329 { 330 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 331 PetscErrorCode ierr; 332 333 PetscFunctionBegin; 334 ierr = TaoLineSearchSetFromOptions(tao->linesearch);CHKERRQ(ierr); 335 ierr = PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization");CHKERRQ(ierr); 336 ierr = PetscOptionsReal("-tao_bncg_eta","restart tolerance", "", cg->eta,&cg->eta,NULL);CHKERRQ(ierr); 337 ierr = PetscOptionsReal("-tao_bncg_rho","descent direction tolerance", "", cg->rho,&cg->rho,NULL);CHKERRQ(ierr); 338 ierr = PetscOptionsReal("-tao_bncg_pow","descent direction exponent", "", cg->pow,&cg->pow,NULL);CHKERRQ(ierr); 339 ierr = PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CG_Types, CG_Table[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr); 340 ierr = PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr); 341 ierr = PetscOptionsBool("-tao_bncg_recycle","enable recycling the existing solution and gradient at the start of a new solve","",cg->recycle,&cg->recycle,NULL);CHKERRQ(ierr); 342 ierr = PetscOptionsReal("-tao_bncg_as_tol", "initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL);CHKERRQ(ierr); 343 ierr = PetscOptionsReal("-tao_bncg_as_step", "step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL);CHKERRQ(ierr); 344 ierr = PetscOptionsTail();CHKERRQ(ierr); 345 PetscFunctionReturn(0); 346 } 347 348 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) 349 { 350 PetscBool isascii; 351 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 ierr = PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);CHKERRQ(ierr); 356 if (isascii) { 357 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 358 ierr = PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type]);CHKERRQ(ierr); 359 ierr = PetscViewerASCIIPrintf(viewer, "Resets: %i\n", cg->resets);CHKERRQ(ierr); 360 ierr = PetscViewerASCIIPrintf(viewer, " Broken ortho: %i\n", cg->broken_ortho);CHKERRQ(ierr); 361 ierr = PetscViewerASCIIPrintf(viewer, " Not a descent dir.: %i\n", cg->descent_error);CHKERRQ(ierr); 362 ierr = PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails);CHKERRQ(ierr); 363 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 364 } 365 PetscFunctionReturn(0); 366 } 367 368 /*MC 369 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 370 371 Options Database Keys: 372 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls 373 . -tao_bncg_eta <r> - restart tolerance 374 . -tao_bncg_type <taocg_type> - cg formula 375 . -tao_bncg_as_type <none,bertsekas> - active set estimation method 376 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation 377 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation 378 379 Notes: 380 CG formulas are: 381 "fr" - Fletcher-Reeves 382 "pr" - Polak-Ribiere 383 "prp" - Polak-Ribiere-Plus 384 "hs" - Hestenes-Steifel 385 "dy" - Dai-Yuan 386 Level: beginner 387 M*/ 388 389 390 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) 391 { 392 TAO_BNCG *cg; 393 const char *morethuente_type = TAOLINESEARCHMT; 394 PetscErrorCode ierr; 395 396 PetscFunctionBegin; 397 tao->ops->setup = TaoSetUp_BNCG; 398 tao->ops->solve = TaoSolve_BNCG; 399 tao->ops->view = TaoView_BNCG; 400 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 401 tao->ops->destroy = TaoDestroy_BNCG; 402 403 /* Override default settings (unless already changed) */ 404 if (!tao->max_it_changed) tao->max_it = 2000; 405 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 406 407 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 408 /* method. In particular, gtol should be less that 0.5; the value used in */ 409 /* Nocedal and Wright is 0.10. We use the default values for the */ 410 /* linesearch because it seems to work better. */ 411 ierr = TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch);CHKERRQ(ierr); 412 ierr = PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1);CHKERRQ(ierr); 413 ierr = TaoLineSearchSetType(tao->linesearch, morethuente_type);CHKERRQ(ierr); 414 ierr = TaoLineSearchUseTaoRoutines(tao->linesearch, tao);CHKERRQ(ierr); 415 ierr = TaoLineSearchSetOptionsPrefix(tao->linesearch,tao->hdr.prefix);CHKERRQ(ierr); 416 417 ierr = PetscNewLog(tao,&cg);CHKERRQ(ierr); 418 tao->data = (void*)cg; 419 cg->rho = 1e-4; 420 cg->pow = 2.1; 421 cg->eta = 0.5; 422 cg->as_step = 0.001; 423 cg->as_tol = 0.001; 424 cg->as_type = CG_AS_BERTSEKAS; 425 cg->cg_type = CG_DaiYuan; 426 cg->recycle = PETSC_FALSE; 427 PetscFunctionReturn(0); 428 } 429