1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> 3 4 #define CG_FletcherReeves 0 5 #define CG_PolakRibiere 1 6 #define CG_PolakRibierePlus 2 7 #define CG_HestenesStiefel 3 8 #define CG_DaiYuan 4 9 #define CG_Types 5 10 11 static const char *CG_Table[64] = {"fr", "pr", "prp", "hs", "dy"}; 12 13 #define CG_AS_NONE 0 14 #define CG_AS_BERTSEKAS 1 15 #define CG_AS_SIZE 2 16 17 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 18 19 PetscErrorCode TaoBNCGSetRecycleFlag(Tao tao, PetscBool recycle) 20 { 21 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 22 23 PetscFunctionBegin; 24 cg->recycle = recycle; 25 PetscFunctionReturn(0); 26 } 27 28 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) 29 { 30 PetscErrorCode ierr; 31 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 32 33 PetscFunctionBegin; 34 ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr); 35 if (cg->inactive_idx) { 36 ierr = ISDuplicate(cg->inactive_idx, &cg->inactive_old);CHKERRQ(ierr); 37 ierr = ISCopy(cg->inactive_idx, cg->inactive_old);CHKERRQ(ierr); 38 } 39 switch (asType) { 40 case CG_AS_NONE: 41 ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr); 42 ierr = VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx);CHKERRQ(ierr); 43 ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr); 44 ierr = ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx);CHKERRQ(ierr); 45 break; 46 47 case CG_AS_BERTSEKAS: 48 /* Use gradient descent to estimate the active set */ 49 ierr = VecCopy(cg->unprojected_gradient, cg->W);CHKERRQ(ierr); 50 ierr = VecScale(cg->W, -1.0);CHKERRQ(ierr); 51 ierr = TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->as_step, &cg->as_tol, &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);CHKERRQ(ierr); 52 53 default: 54 break; 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode TaoBNCGBoundStep(Tao tao, Vec step) 60 { 61 PetscErrorCode ierr; 62 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 63 64 PetscFunctionBegin; 65 switch (cg->as_type) { 66 case CG_AS_NONE: 67 if (cg->active_idx) {ierr = VecISSet(step, cg->active_idx, 0.0);CHKERRQ(ierr);} 68 break; 69 70 case CG_AS_BERTSEKAS: 71 ierr = TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, step);CHKERRQ(ierr); 72 break; 73 74 default: 75 break; 76 } 77 PetscFunctionReturn(0); 78 } 79 80 static PetscErrorCode TaoSolve_BNCG(Tao tao) 81 { 82 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 83 PetscErrorCode ierr; 84 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 85 PetscReal step=1.0,gnorm,gnorm2,delta,gd,ginner,beta,dnorm,resnorm; 86 PetscReal gd_old,gnorm2_old,f_old; 87 PetscBool cg_restart; 88 89 PetscFunctionBegin; 90 /* Project the current point onto the feasible set */ 91 ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); 92 ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr); 93 94 /* Project the initial point onto the feasible region */ 95 ierr = VecMedian(tao->XL,tao->solution,tao->XU,tao->solution);CHKERRQ(ierr); 96 97 /* Compute the objective function and criteria */ 98 if (!cg->recycle) { 99 ierr = TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient);CHKERRQ(ierr); 100 } 101 ierr = VecNorm(cg->unprojected_gradient,NORM_2,&gnorm);CHKERRQ(ierr); 102 if (PetscIsInfOrNanReal(cg->f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); 103 104 /* Estimate the active set and compute the projected gradient */ 105 ierr = VecCopy(cg->unprojected_gradient, cg->W);CHKERRQ(ierr); 106 ierr = VecScale(cg->W, -1.0);CHKERRQ(ierr); 107 ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr); 108 109 /* Project the gradient and calculate the norm */ 110 ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr); 111 ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr); 112 ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr); 113 gnorm2 = gnorm*gnorm; 114 115 /* Convergence check */ 116 tao->niter = 0; 117 tao->reason = TAO_CONTINUE_ITERATING; 118 ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr); 119 ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr); 120 ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr); 121 ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr); 122 ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); 123 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 124 125 /* Start optimization iterations */ 126 cg->ls_fails = cg->broken_ortho = cg->descent_error = 0; 127 cg->resets = -1; 128 while (tao->reason == TAO_CONTINUE_ITERATING) { 129 /* Check restart conditions for using steepest descent */ 130 cg_restart = PETSC_FALSE; 131 ierr = VecDot(tao->gradient, cg->G_old, &ginner);CHKERRQ(ierr); 132 ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr); 133 if (tao->niter == 0 && !cg->recycle && dnorm != 0.0) { 134 /* 1) First iteration, with recycle disabled, and a non-zero previous step */ 135 cg_restart = PETSC_TRUE; 136 } else if (PetscAbsScalar(ginner) >= cg->eta * gnorm2) { 137 /* 2) Gradients are far from orthogonal */ 138 cg_restart = PETSC_TRUE; 139 cg->broken_ortho++; 140 } 141 142 /* Compute CG step */ 143 if (cg_restart) { 144 beta = 0.0; 145 cg->resets++; 146 } else { 147 switch (cg->cg_type) { 148 case CG_FletcherReeves: 149 beta = gnorm2 / gnorm2_old; 150 break; 151 152 case CG_PolakRibiere: 153 beta = (gnorm2 - ginner) / gnorm2_old; 154 break; 155 156 case CG_PolakRibierePlus: 157 beta = PetscMax((gnorm2-ginner)/gnorm2_old, 0.0); 158 break; 159 160 case CG_HestenesStiefel: 161 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 162 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 163 beta = (gnorm2 - ginner) / (gd - gd_old); 164 break; 165 166 case CG_DaiYuan: 167 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 168 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 169 beta = gnorm2 / (gd - gd_old); 170 break; 171 172 default: 173 beta = 0.0; 174 break; 175 } 176 } 177 178 /* Compute the direction d=-g + beta*d */ 179 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, tao->gradient);CHKERRQ(ierr); 180 ierr = TaoBNCGBoundStep(tao, tao->stepdirection);CHKERRQ(ierr); 181 if (cg->inactive_old) { 182 ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr); 183 ierr = ISDifference(cg->inactive_old, cg->inactive_idx, &cg->new_inactives); 184 ierr = VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr); 185 ierr = VecGetSubVector(tao->gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr); 186 ierr = VecCopy(cg->inactive_grad, cg->inactive_step);CHKERRQ(ierr); 187 ierr = VecScale(cg->inactive_step, -1.0);CHKERRQ(ierr); 188 ierr = VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr); 189 ierr = VecRestoreSubVector(tao->gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr); 190 } 191 192 /* Verify that this is a descent direction */ 193 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 194 ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm); 195 if (gd > -cg->rho*PetscPowReal(dnorm, cg->pow)) { 196 /* Not a descent direction, so we reset back to projected gradient descent */ 197 ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, tao->gradient);CHKERRQ(ierr); 198 cg->resets++; 199 cg->descent_error++; 200 } 201 202 /* update initial steplength choice */ 203 delta = 1.0; 204 delta = PetscMax(delta, cg->delta_min); 205 delta = PetscMin(delta, cg->delta_max); 206 207 /* Store solution and gradient info before it changes */ 208 ierr = VecCopy(tao->solution, cg->X_old);CHKERRQ(ierr); 209 ierr = VecCopy(tao->gradient, cg->G_old);CHKERRQ(ierr); 210 ierr = VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old);CHKERRQ(ierr); 211 gnorm2_old = gnorm2; 212 f_old = cg->f; 213 214 /* Perform bounded line search */ 215 ierr = TaoLineSearchSetInitialStepLength(tao->linesearch,delta);CHKERRQ(ierr); 216 ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr); 217 ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); 218 219 /* Check linesearch failure */ 220 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 221 cg->ls_fails++; 222 /* Restore previous point */ 223 gnorm2 = gnorm2_old; 224 cg->f = f_old; 225 ierr = VecCopy(cg->X_old, tao->solution);CHKERRQ(ierr); 226 ierr = VecCopy(cg->G_old, tao->gradient);CHKERRQ(ierr); 227 ierr = VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient);CHKERRQ(ierr); 228 229 /* Fall back on the unscaled gradient step */ 230 delta = 1.0; 231 ierr = VecCopy(tao->gradient, tao->stepdirection);CHKERRQ(ierr); 232 ierr = VecScale(tao->stepdirection, -1.0);CHKERRQ(ierr); 233 ierr = TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, tao->stepdirection);CHKERRQ(ierr); 234 235 ierr = TaoLineSearchSetInitialStepLength(tao->linesearch,delta);CHKERRQ(ierr); 236 ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr); 237 ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); 238 239 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){ 240 cg->ls_fails++; 241 /* Restore previous point */ 242 gnorm2 = gnorm2_old; 243 cg->f = f_old; 244 ierr = VecCopy(cg->X_old, tao->solution);CHKERRQ(ierr); 245 ierr = VecCopy(cg->G_old, tao->gradient);CHKERRQ(ierr); 246 ierr = VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient);CHKERRQ(ierr); 247 248 /* Nothing left to do but fail out of the optimization */ 249 step = 0.0; 250 tao->reason = TAO_DIVERGED_LS_FAILURE; 251 } 252 } 253 254 /* Estimate the active set at the new solution */ 255 ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr); 256 257 /* Compute the projected gradient and its norm */ 258 ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr); 259 ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr); 260 ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr); 261 gnorm2 = gnorm*gnorm; 262 263 /* Convergence test */ 264 tao->niter++; 265 ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr); 266 ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr); 267 ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr); 268 ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr); 269 ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); 270 } 271 PetscFunctionReturn(0); 272 } 273 274 static PetscErrorCode TaoSetUp_BNCG(Tao tao) 275 { 276 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 if (!tao->gradient) {ierr = VecDuplicate(tao->solution,&tao->gradient);CHKERRQ(ierr);} 281 if (!tao->stepdirection) {ierr = VecDuplicate(tao->solution,&tao->stepdirection);CHKERRQ(ierr);} 282 if (!cg->W) {ierr = VecDuplicate(tao->solution,&cg->W);CHKERRQ(ierr);} 283 if (!cg->X_old) {ierr = VecDuplicate(tao->solution,&cg->X_old);CHKERRQ(ierr);} 284 if (!cg->G_old) {ierr = VecDuplicate(tao->gradient,&cg->G_old);CHKERRQ(ierr);} 285 if (!cg->unprojected_gradient) {ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient);CHKERRQ(ierr);} 286 if (!cg->unprojected_gradient_old) {ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient_old);CHKERRQ(ierr);} 287 PetscFunctionReturn(0); 288 } 289 290 static PetscErrorCode TaoDestroy_BNCG(Tao tao) 291 { 292 TAO_BNCG *cg = (TAO_BNCG*) tao->data; 293 PetscErrorCode ierr; 294 295 PetscFunctionBegin; 296 if (tao->setupcalled) { 297 ierr = VecDestroy(&cg->W);CHKERRQ(ierr); 298 ierr = VecDestroy(&cg->X_old);CHKERRQ(ierr); 299 ierr = VecDestroy(&cg->G_old);CHKERRQ(ierr); 300 ierr = VecDestroy(&cg->unprojected_gradient);CHKERRQ(ierr); 301 ierr = VecDestroy(&cg->unprojected_gradient_old);CHKERRQ(ierr); 302 } 303 ierr = PetscFree(tao->data);CHKERRQ(ierr); 304 PetscFunctionReturn(0); 305 } 306 307 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao) 308 { 309 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 310 PetscErrorCode ierr; 311 312 PetscFunctionBegin; 313 ierr = TaoLineSearchSetFromOptions(tao->linesearch);CHKERRQ(ierr); 314 ierr = PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization");CHKERRQ(ierr); 315 ierr = PetscOptionsReal("-tao_bncg_eta","restart tolerance", "", cg->eta,&cg->eta,NULL);CHKERRQ(ierr); 316 ierr = PetscOptionsReal("-tao_bncg_rho","descent direction tolerance", "", cg->rho,&cg->rho,NULL);CHKERRQ(ierr); 317 ierr = PetscOptionsReal("-tao_bncg_pow","descent direction exponent", "", cg->pow,&cg->pow,NULL);CHKERRQ(ierr); 318 ierr = PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CG_Types, CG_Table[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr); 319 ierr = PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr); 320 ierr = PetscOptionsReal("-tao_bncg_delta_min","minimum delta value", "", cg->delta_min,&cg->delta_min,NULL);CHKERRQ(ierr); 321 ierr = PetscOptionsReal("-tao_bncg_delta_max","maximum delta value", "", cg->delta_max,&cg->delta_max,NULL);CHKERRQ(ierr); 322 ierr = PetscOptionsBool("-tao_bncg_recycle","enable recycling the existing solution and gradient at the start of a new solve","",cg->recycle,&cg->recycle,NULL);CHKERRQ(ierr); 323 ierr = PetscOptionsReal("-tao_bncg_as_tol", "initial tolerance used when estimating actively bounded variables", "", cg->as_tol, &cg->as_tol,NULL);CHKERRQ(ierr); 324 ierr = PetscOptionsReal("-tao_bncg_as_step", "step length used when estimating actively bounded variables", "", cg->as_step, &cg->as_step,NULL);CHKERRQ(ierr); 325 ierr = PetscOptionsTail();CHKERRQ(ierr); 326 PetscFunctionReturn(0); 327 } 328 329 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) 330 { 331 PetscBool isascii; 332 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 333 PetscErrorCode ierr; 334 335 PetscFunctionBegin; 336 ierr = PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);CHKERRQ(ierr); 337 if (isascii) { 338 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 339 ierr = PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type]);CHKERRQ(ierr); 340 ierr = PetscViewerASCIIPrintf(viewer, "Resets: %i\n", cg->resets);CHKERRQ(ierr); 341 ierr = PetscViewerASCIIPrintf(viewer, " Broken ortho: %i\n", cg->broken_ortho);CHKERRQ(ierr); 342 ierr = PetscViewerASCIIPrintf(viewer, " Not a descent dir.: %i\n", cg->descent_error);CHKERRQ(ierr); 343 ierr = PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails);CHKERRQ(ierr); 344 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 345 } 346 PetscFunctionReturn(0); 347 } 348 349 /*MC 350 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 351 352 Options Database Keys: 353 + -tao_bncg_eta <r> - restart tolerance 354 . -tao_bncg_type <taocg_type> - cg formula 355 . -tao_bncg_delta_min <r> - minimum delta value 356 - -tao_bncg_delta_max <r> - maximum delta value 357 358 Notes: 359 CG formulas are: 360 "fr" - Fletcher-Reeves 361 "pr" - Polak-Ribiere 362 "prp" - Polak-Ribiere-Plus 363 "hs" - Hestenes-Steifel 364 "dy" - Dai-Yuan 365 Level: beginner 366 M*/ 367 368 369 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) 370 { 371 TAO_BNCG *cg; 372 const char *morethuente_type = TAOLINESEARCHMT; 373 PetscErrorCode ierr; 374 375 PetscFunctionBegin; 376 tao->ops->setup = TaoSetUp_BNCG; 377 tao->ops->solve = TaoSolve_BNCG; 378 tao->ops->view = TaoView_BNCG; 379 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 380 tao->ops->destroy = TaoDestroy_BNCG; 381 382 /* Override default settings (unless already changed) */ 383 if (!tao->max_it_changed) tao->max_it = 2000; 384 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 385 386 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 387 /* method. In particular, gtol should be less that 0.5; the value used in */ 388 /* Nocedal and Wright is 0.10. We use the default values for the */ 389 /* linesearch because it seems to work better. */ 390 ierr = TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch);CHKERRQ(ierr); 391 ierr = PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1);CHKERRQ(ierr); 392 ierr = TaoLineSearchSetType(tao->linesearch, morethuente_type);CHKERRQ(ierr); 393 ierr = TaoLineSearchUseTaoRoutines(tao->linesearch, tao);CHKERRQ(ierr); 394 ierr = TaoLineSearchSetOptionsPrefix(tao->linesearch,tao->hdr.prefix);CHKERRQ(ierr); 395 396 ierr = PetscNewLog(tao,&cg);CHKERRQ(ierr); 397 tao->data = (void*)cg; 398 cg->rho = 1e-4; 399 cg->pow = 2.1; 400 cg->eta = 0.5; 401 cg->delta_min = 1e-7; 402 cg->delta_max = 100; 403 cg->as_step = 0.001; 404 cg->as_tol = 0.001; 405 cg->as_type = CG_AS_BERTSEKAS; 406 cg->cg_type = CG_DaiYuan; 407 cg->recycle = PETSC_FALSE; 408 PetscFunctionReturn(0); 409 } 410