1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> 3 4 #define CG_GradientDescent 0 5 #define CG_HestenesStiefel 1 6 #define CG_FletcherReeves 2 7 #define CG_PolakRibiere 3 8 #define CG_PolakRibierePlus 4 9 #define CG_DaiYuan 5 10 #define CG_HagerZhang 6 11 #define CG_DaiKou 7 12 #define CG_KouDai 8 13 #define CG_SSML_BFGS 9 14 #define CG_SSML_DFP 10 15 #define CG_SSML_BROYDEN 11 16 #define CG_BFGS_Mod 12 17 #define CG_Types 13 18 19 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "ssml_bfgs_mod"}; 20 21 #define CG_AS_NONE 0 22 #define CG_AS_BERTSEKAS 1 23 #define CG_AS_SIZE 2 24 25 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 26 27 PetscErrorCode TaoBNCGSetRecycleFlag(Tao tao, PetscBool recycle) 28 { 29 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 30 31 PetscFunctionBegin; 32 cg->recycle = recycle; 33 PetscFunctionReturn(0); 34 } 35 36 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) 37 { 38 PetscErrorCode ierr; 39 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 40 41 PetscFunctionBegin; 42 ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr); 43 if (cg->inactive_idx) { 44 ierr = ISDuplicate(cg->inactive_idx, &cg->inactive_old);CHKERRQ(ierr); 45 ierr = ISCopy(cg->inactive_idx, cg->inactive_old);CHKERRQ(ierr); 46 } 47 switch (asType) { 48 case CG_AS_NONE: 49 ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr); 50 ierr = VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx);CHKERRQ(ierr); 51 ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr); 52 ierr = ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx);CHKERRQ(ierr); 53 break; 54 55 case CG_AS_BERTSEKAS: 56 /* Use gradient descent to estimate the active set */ 57 ierr = VecCopy(cg->unprojected_gradient, cg->W);CHKERRQ(ierr); 58 ierr = VecScale(cg->W, -1.0);CHKERRQ(ierr); 59 ierr = TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, 60 &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);CHKERRQ(ierr); 61 break; 62 63 default: 64 break; 65 } 66 PetscFunctionReturn(0); 67 } 68 69 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step) 70 { 71 PetscErrorCode ierr; 72 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 73 74 PetscFunctionBegin; 75 switch (asType) { 76 case CG_AS_NONE: 77 ierr = VecISSet(step, cg->active_idx, 0.0);CHKERRQ(ierr); 78 break; 79 80 case CG_AS_BERTSEKAS: 81 ierr = TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step);CHKERRQ(ierr); 82 break; 83 84 default: 85 break; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 static PetscErrorCode TaoSolve_BNCG(Tao tao) 91 { 92 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 93 PetscErrorCode ierr; 94 PetscReal step=1.0,gnorm,gnorm2; 95 PetscInt nDiff; 96 97 PetscFunctionBegin; 98 /* Project the current point onto the feasible set */ 99 ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); 100 ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr); 101 102 /* Project the initial point onto the feasible region */ 103 ierr = TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution);CHKERRQ(ierr); 104 ierr = TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient);CHKERRQ(ierr); 105 ierr = VecNorm(cg->unprojected_gradient,NORM_2,&gnorm);CHKERRQ(ierr); 106 if (PetscIsInfOrNanReal(cg->f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); 107 108 /* Estimate the active set and compute the projected gradient */ 109 ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr); 110 111 /* Project the gradient and calculate the norm */ 112 ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr); 113 ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr); 114 ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr); 115 gnorm2 = gnorm*gnorm; 116 117 /* Initialize counters */ 118 tao->niter = 0; 119 cg->ls_fails = cg->broken_ortho = cg->descent_error = 0; 120 cg->resets = -1; 121 cg->iter_quad = 0; 122 123 /* Convergence test at the starting point. */ 124 tao->reason = TAO_CONTINUE_ITERATING; 125 ierr = TaoLogConvergenceHistory(tao, cg->f, gnorm, 0.0, tao->ksp_its);CHKERRQ(ierr); 126 ierr = TaoMonitor(tao, tao->niter, cg->f, gnorm, 0.0, step);CHKERRQ(ierr); 127 ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); 128 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 129 130 /* Assert that we have not converged. Calculate initial direction. */ 131 /* This is where recycling goes. The outcome of this code must be */ 132 /* the direction that we will use. */ 133 if (cg->recycle) { 134 } 135 /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */ 136 ierr = TaoBNCGResetUpdate(tao, gnorm2); 137 138 while(1) { 139 ierr = TaoBNCGConductIteration(tao, gnorm); CHKERRQ(ierr); 140 if (cg->use_steffenson) ierr = TaoBNCGSteffensonAcceleration(tao); CHKERRQ(ierr); 141 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 142 } 143 PetscFunctionReturn(0); 144 } 145 146 static PetscErrorCode TaoSetUp_BNCG(Tao tao) 147 { 148 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 149 PetscErrorCode ierr; 150 151 PetscFunctionBegin; 152 if (!tao->gradient) { 153 ierr = VecDuplicate(tao->solution,&tao->gradient);CHKERRQ(ierr); 154 } 155 if (!tao->stepdirection) { 156 ierr = VecDuplicate(tao->solution,&tao->stepdirection);CHKERRQ(ierr); 157 } 158 if (!cg->W) { 159 ierr = VecDuplicate(tao->solution,&cg->W);CHKERRQ(ierr); 160 } 161 if (!cg->work) { 162 ierr = VecDuplicate(tao->solution,&cg->work);CHKERRQ(ierr); 163 } 164 if (!cg->sk) { 165 ierr = VecDuplicate(tao->solution,&cg->sk);CHKERRQ(ierr); 166 } 167 if (!cg->yk) { 168 ierr = VecDuplicate(tao->gradient,&cg->yk);CHKERRQ(ierr); 169 } 170 if (!cg->X_old) { 171 ierr = VecDuplicate(tao->solution,&cg->X_old);CHKERRQ(ierr); 172 } 173 if (!cg->G_old) { 174 ierr = VecDuplicate(tao->gradient,&cg->G_old);CHKERRQ(ierr); 175 ierr = VecDuplicate(tao->solution,&cg->g_work);CHKERRQ(ierr); 176 } 177 if (cg->use_steffenson){ 178 if (!cg->steffnm1) ierr = VecDuplicate(tao->solution, &cg->steffnm1); CHKERRQ(ierr); 179 if (!cg->steffn) ierr = VecDuplicate(tao->solution, &cg->steffn); CHKERRQ(ierr); 180 if (!cg->steffnp1) ierr = VecDuplicate(tao->solution, &cg->steffnp1); CHKERRQ(ierr); 181 if (!cg->steffva) ierr = VecDuplicate(tao->solution, &cg->steffva); CHKERRQ(ierr); 182 if (!cg->steffvatmp) ierr = VecDuplicate(tao->solution, &cg->steffvatmp); CHKERRQ(ierr); 183 } 184 if (cg->diag_scaling){ 185 ierr = VecDuplicate(tao->gradient,&cg->invD);CHKERRQ(ierr); 186 ierr = VecDuplicate(tao->gradient,&cg->invDnew);CHKERRQ(ierr); 187 ierr = VecSet(cg->invDnew, 1.0);CHKERRQ(ierr); 188 ierr = VecDuplicate(tao->gradient,&cg->bfgs_work);CHKERRQ(ierr); 189 ierr = VecDuplicate(tao->gradient,&cg->dfp_work);CHKERRQ(ierr); 190 ierr = VecDuplicate(tao->gradient,&cg->U);CHKERRQ(ierr); 191 ierr = VecDuplicate(tao->gradient,&cg->V);CHKERRQ(ierr); 192 ierr = VecDuplicate(tao->gradient,&cg->W);CHKERRQ(ierr); 193 ierr = VecDuplicate(tao->solution,&cg->d_work);CHKERRQ(ierr); 194 ierr = VecDuplicate(tao->solution,&cg->y_work);CHKERRQ(ierr); 195 196 } 197 if (!cg->unprojected_gradient) { 198 ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient);CHKERRQ(ierr); 199 } 200 if (!cg->unprojected_gradient_old) { 201 ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient_old);CHKERRQ(ierr); 202 } 203 PetscFunctionReturn(0); 204 } 205 206 static PetscErrorCode TaoDestroy_BNCG(Tao tao) 207 { 208 TAO_BNCG *cg = (TAO_BNCG*) tao->data; 209 PetscErrorCode ierr; 210 211 PetscFunctionBegin; 212 if (tao->setupcalled) { 213 ierr = VecDestroy(&cg->W);CHKERRQ(ierr); 214 ierr = VecDestroy(&cg->work);CHKERRQ(ierr); 215 ierr = VecDestroy(&cg->X_old);CHKERRQ(ierr); 216 ierr = VecDestroy(&cg->G_old);CHKERRQ(ierr); 217 ierr = VecDestroy(&cg->unprojected_gradient);CHKERRQ(ierr); 218 ierr = VecDestroy(&cg->unprojected_gradient_old);CHKERRQ(ierr); 219 } 220 ierr = ISDestroy(&cg->active_lower);CHKERRQ(ierr); 221 ierr = ISDestroy(&cg->active_upper);CHKERRQ(ierr); 222 ierr = ISDestroy(&cg->active_fixed);CHKERRQ(ierr); 223 ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr); 224 ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr); 225 ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr); 226 ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr); 227 ierr = PetscFree(tao->data);CHKERRQ(ierr); 228 PetscFunctionReturn(0); 229 } 230 231 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao) 232 { 233 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 234 PetscErrorCode ierr; 235 236 PetscFunctionBegin; 237 ierr = TaoLineSearchSetFromOptions(tao->linesearch);CHKERRQ(ierr); 238 ierr = PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization");CHKERRQ(ierr); 239 ierr = PetscOptionsReal("-tao_bncg_eta","cutoff tolerance for HZ", "", cg->eta,&cg->eta,NULL);CHKERRQ(ierr); 240 ierr = PetscOptionsReal("-tao_bncg_xi","Parameter in KD, HZ, and DK methods", "", cg->xi,&cg->xi,NULL);CHKERRQ(ierr); 241 ierr = PetscOptionsReal("-tao_bncg_gamma", "stabilization term for multiple CG methods", "", cg->gamma, &cg->gamma, NULL); CHKERRQ(ierr); 242 ierr = PetscOptionsReal("-tao_bncg_theta", "update parameter for some CG methods", "", cg->theta, &cg->theta, NULL); CHKERRQ(ierr); 243 ierr = PetscOptionsReal("-tao_bncg_hz_theta", "parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL); CHKERRQ(ierr); 244 ierr = PetscOptionsReal("-tao_bncg_rho","(developer) update limiter in the J0 scaling","",cg->rho,&cg->rho,NULL);CHKERRQ(ierr); 245 ierr = PetscOptionsReal("-tao_bncg_alpha","(developer) convex ratio in the J0 scaling","",cg->alpha,&cg->alpha,NULL);CHKERRQ(ierr); 246 ierr = PetscOptionsReal("-tao_bncg_beta","(developer) exponential factor in the diagonal J0 scaling","",cg->beta,&cg->beta,NULL);CHKERRQ(ierr); 247 ierr = PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL); CHKERRQ(ierr); 248 ierr = PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL); CHKERRQ(ierr); 249 ierr = PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL);CHKERRQ(ierr); 250 ierr = PetscOptionsBool("-tao_bncg_inv_sig","(developer) test parameter to invert the sigma scaling","",cg->inv_sig,&cg->inv_sig,NULL);CHKERRQ(ierr); 251 ierr = PetscOptionsBool("-tao_bncg_dynamic_restart","use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL);CHKERRQ(ierr); 252 ierr = PetscOptionsBool("-tao_bncg_use_steffenson","(incomplete) use vector-Steffenson acceleration","",cg->use_steffenson,&cg->use_steffenson,NULL);CHKERRQ(ierr); 253 ierr = PetscOptionsReal("-tao_bncg_zeta", "Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL); CHKERRQ(ierr); 254 ierr = PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL); CHKERRQ(ierr); 255 ierr = PetscOptionsInt("-tao_bncg_min_restart_num", "Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL); CHKERRQ(ierr); 256 ierr = PetscOptionsReal("-tao_bncg_rho","(developer) descent direction tolerance", "", cg->rho,&cg->rho,NULL);CHKERRQ(ierr); 257 ierr = PetscOptionsReal("-tao_bncg_pow","(developer) descent direction exponent", "", cg->pow,&cg->pow,NULL);CHKERRQ(ierr); 258 ierr = PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CG_Types, CG_Table[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr); 259 ierr = PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr); 260 ierr = PetscOptionsBool("-tao_bncg_recycle","enable recycling the existing solution and gradient at the start of a new solve","",cg->recycle,&cg->recycle,NULL);CHKERRQ(ierr); 261 ierr = PetscOptionsBool("-tao_bncg_spaced_restart","Enable regular steepest descent restarting every ixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL);CHKERRQ(ierr); 262 ierr = PetscOptionsBool("-tao_bncg_neg_xi","(Developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL);CHKERRQ(ierr); 263 ierr = PetscOptionsReal("-tao_bncg_as_tol", "initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL);CHKERRQ(ierr); 264 ierr = PetscOptionsReal("-tao_bncg_as_step", "step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL);CHKERRQ(ierr); 265 ierr = PetscOptionsTail();CHKERRQ(ierr); 266 PetscFunctionReturn(0); 267 } 268 269 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) 270 { 271 PetscBool isascii; 272 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 273 PetscErrorCode ierr; 274 275 PetscFunctionBegin; 276 ierr = PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);CHKERRQ(ierr); 277 if (isascii) { 278 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 279 ierr = PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type]);CHKERRQ(ierr); 280 ierr = PetscViewerASCIIPrintf(viewer, "Resets: %i\n", cg->resets);CHKERRQ(ierr); 281 ierr = PetscViewerASCIIPrintf(viewer, " Broken ortho: %i\n", cg->broken_ortho);CHKERRQ(ierr); 282 ierr = PetscViewerASCIIPrintf(viewer, " Not a descent dir.: %i\n", cg->descent_error);CHKERRQ(ierr); 283 ierr = PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails);CHKERRQ(ierr); 284 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 285 } 286 PetscFunctionReturn(0); 287 } 288 289 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha) 290 { 291 PetscReal a, b, c, sig1, sig2; 292 293 PetscFunctionBegin; 294 *scale = 0.0; 295 296 if (alpha == 1.0){ 297 *scale = yts/yty; 298 } else if (alpha == 0.5) { 299 *scale = sts/yty; 300 } 301 else if (alpha == 0.0){ 302 *scale = sts/yts; 303 } 304 else if (alpha == -1.0) *scale = 1.0; 305 else { 306 a = yty; 307 b = yts; 308 c = sts; 309 a *= alpha; 310 b *= -(2.0*alpha - 1.0); 311 c *= alpha - 1.0; 312 sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 313 sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 314 /* accept the positive root as the scalar */ 315 if (sig1 > 0.0) { 316 *scale = sig1; 317 } else if (sig2 > 0.0) { 318 *scale = sig2; 319 } else { 320 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar"); 321 } 322 } 323 PetscFunctionReturn(0); 324 } 325 326 /*MC 327 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 328 329 Options Database Keys: 330 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls 331 . -tao_bncg_eta <r> - restart tolerance 332 . -tao_bncg_type <taocg_type> - cg formula 333 . -tao_bncg_as_type <none,bertsekas> - active set estimation method 334 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation 335 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation 336 337 Notes: 338 CG formulas are: 339 "fr" - Fletcher-Reeves 340 "pr" - Polak-Ribiere 341 "prp" - Polak-Ribiere-Plus 342 "hs" - Hestenes-Steifel 343 "dy" - Dai-Yuan 344 "gd" - Gradient Descent 345 Level: beginner 346 M*/ 347 348 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) 349 { 350 TAO_BNCG *cg; 351 const char *morethuente_type = TAOLINESEARCHMT; 352 PetscErrorCode ierr; 353 354 PetscFunctionBegin; 355 tao->ops->setup = TaoSetUp_BNCG; 356 tao->ops->solve = TaoSolve_BNCG; 357 tao->ops->view = TaoView_BNCG; 358 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 359 tao->ops->destroy = TaoDestroy_BNCG; 360 361 /* Override default settings (unless already changed) */ 362 if (!tao->max_it_changed) tao->max_it = 2000; 363 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 364 365 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 366 /* method. In particular, gtol should be less that 0.5; the value used in */ 367 /* Nocedal and Wright is 0.10. We use the default values for the */ 368 /* linesearch because it seems to work better. */ 369 ierr = TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch);CHKERRQ(ierr); 370 ierr = PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1);CHKERRQ(ierr); 371 ierr = TaoLineSearchSetType(tao->linesearch, morethuente_type);CHKERRQ(ierr); 372 ierr = TaoLineSearchUseTaoRoutines(tao->linesearch, tao);CHKERRQ(ierr); 373 ierr = TaoLineSearchSetOptionsPrefix(tao->linesearch,tao->hdr.prefix);CHKERRQ(ierr); 374 375 ierr = PetscNewLog(tao,&cg);CHKERRQ(ierr); 376 tao->data = (void*)cg; 377 378 cg->pow = 2.1; 379 cg->eta = 0.5; 380 cg->dynamic_restart = PETSC_FALSE; 381 cg->unscaled_restart = PETSC_FALSE; 382 cg->theta = 1.0; 383 cg->hz_theta = 1.0; 384 cg->dfp_scale = 1.0; 385 cg->bfgs_scale = 1.0; 386 cg->gamma = 0.4; 387 cg->zeta = 0.5; 388 cg->min_quad = 3; 389 cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/ 390 cg->xi = 1.0; 391 cg->neg_xi = PETSC_FALSE; 392 cg->spaced_restart = PETSC_FALSE; 393 cg->tol_quad = 1e-8; 394 cg->as_step = 0.001; 395 cg->as_tol = 0.001; 396 cg->epsilon = PETSC_MACHINE_EPSILON; 397 cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); 398 cg->as_type = CG_AS_BERTSEKAS; 399 cg->cg_type = CG_SSML_BFGS; 400 cg->recycle = PETSC_FALSE; 401 cg->alpha = 1.0; 402 cg->rho = 1.0; 403 cg->beta = 0.5; /* Default a la Alp */ 404 cg->diag_scaling = PETSC_TRUE; 405 cg->inv_sig = PETSC_FALSE; 406 PetscFunctionReturn(0); 407 } 408 409 PetscErrorCode TaoBNCGComputeDiagScaling(Tao tao, PetscReal yts, PetscReal yty){ 410 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 411 PetscErrorCode ierr; 412 PetscScalar ytDy, ytDs, stDs; 413 PetscReal sigma; 414 415 PetscFunctionBegin; 416 /* W = Hy */ 417 ierr = VecPointwiseMult(cg->W, cg->invD, cg->yk);CHKERRQ(ierr); 418 ierr = VecDot(cg->W, cg->yk, &ytDy);CHKERRQ(ierr); 419 /* Compute Hadamard product V = s*s^T */ 420 ierr = VecPointwiseMult(cg->V, cg->sk, cg->sk);CHKERRQ(ierr); 421 /* Compute the BFGS contribution bfgs_work */ 422 /* first construct sDy - Hadamard product */ 423 ierr = VecPointwiseMult(cg->bfgs_work, cg->sk, cg->W); CHKERRQ(ierr); 424 /* Now assemble the BFGS component in there - denom of yts added later */ 425 ierr = VecAXPBY(cg->bfgs_work, ytDy/(yts), -2.0, cg->V); CHKERRQ(ierr); 426 /* Start assembling the new inverse diagonal, the pure DFP component - denom of ytDy added later */ 427 /* Compute Hadamard product U = (Hy)*(Hy)^T */ 428 ierr = VecPointwiseMult(cg->U, cg->W, cg->W); CHKERRQ(ierr); 429 /* D_{k+1} = D_k + V/yts + (1-theta)*BFGS + theta*DFP */ 430 ierr = VecCopy(cg->invD, cg->invDnew); CHKERRQ(ierr); 431 /* The factors I left out in BFGS and DFP */ 432 ierr = VecAXPBYPCZ(cg->invDnew, (1-cg->theta)/yts, -cg->theta/(ytDy), 1.0, cg->bfgs_work, cg->U); CHKERRQ(ierr); 433 ierr = VecAXPY(cg->invDnew, 1/cg->yts, cg->V); 434 /* Ensure positive definite */ 435 ierr = VecAbs(cg->invDnew); CHKERRQ(ierr); 436 /* Start with re-scaling on the newly computed diagonal */ 437 /* Compute y^T H^{2*beta} y */ 438 /* Note that VecPow has special cases tabulated for +-1.0, +-0.5, 0.0, and 2.0 */ 439 ierr = VecCopy(cg->invDnew, cg->work);CHKERRQ(ierr); 440 ierr = VecPow(cg->work, 2.0*cg->beta);CHKERRQ(ierr); 441 ierr = VecPointwiseMult(cg->work, cg->work, cg->yk);CHKERRQ(ierr); 442 ierr = VecDot(cg->yk, cg->work, &ytDy);CHKERRQ(ierr); 443 /* Compute y^T H^{2*beta - 1} s */ 444 ierr = VecCopy(cg->invDnew, cg->work);CHKERRQ(ierr); 445 ierr = VecPow(cg->work, 2.0*cg->beta - 1.0);CHKERRQ(ierr); 446 ierr = VecPointwiseMult(cg->work, cg->work, cg->sk);CHKERRQ(ierr); 447 ierr = VecDot(cg->yk, cg->work, &ytDs);CHKERRQ(ierr); 448 /* Compute s^T H^{2*beta - 2} s */ 449 ierr = VecCopy(cg->invDnew, cg->work);CHKERRQ(ierr); 450 ierr = VecPow(cg->work, 2.0*cg->beta - 2.0);CHKERRQ(ierr); 451 ierr = VecPointwiseMult(cg->work, cg->work, cg->sk);CHKERRQ(ierr); 452 ierr = VecDot(cg->sk, cg->work, &stDs);CHKERRQ(ierr); 453 /* Compute the diagonal scaling */ 454 sigma = 0.0; 455 ierr = TaoBNCGComputeScalarScaling(ytDy, ytDs, stDs, &sigma, cg->alpha); 456 457 /* If Q has small values, then Q^(r_beta - 1) */ 458 /* can have very large values. Hence, ys_sum */ 459 /* and ss_sum can be infinity. In this case, */ 460 /* sigma can either be not-a-number or infinity. */ 461 462 if (PetscIsInfOrNanReal(sigma)) { 463 /* sigma is not-a-number; skip rescaling */ 464 } else if (!sigma) { 465 /* sigma is zero; this is a bad case; skip rescaling */ 466 } else { 467 /* sigma is positive */ 468 ierr = VecScale(cg->invDnew, sigma);CHKERRQ(ierr); 469 } 470 471 /* Combine the old diagonal and the new diagonal using a convex limiter */ 472 if (cg->rho == 1.0) { 473 ierr = VecCopy(cg->invDnew, cg->invD);CHKERRQ(ierr); 474 } else if (cg->rho) { 475 ierr = VecAXPBY(cg->invD, 1.0-cg->rho, cg->rho, cg->invDnew);CHKERRQ(ierr); 476 } 477 PetscFunctionReturn(0); 478 } 479 480 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq) 481 { 482 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 483 PetscErrorCode ierr; 484 PetscReal scaling; 485 486 PetscFunctionBegin; 487 ++cg->resets; 488 scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23); 489 scaling = PetscMin(100.0, PetscMax(1e-7, scaling)); 490 if (cg->unscaled_restart) scaling = 1.0; 491 ierr = VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient);CHKERRQ(ierr); 492 /* Also want to reset our diagonal scaling with each restart */ 493 if (cg->diag_scaling) { 494 ierr = VecSet(cg->invD, 1.0);CHKERRQ(ierr); 495 } 496 497 498 PetscFunctionReturn(0); 499 } 500 501 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold) 502 { 503 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 504 PetscReal quadinterp; 505 506 PetscFunctionBegin; 507 if (cg->f < cg->min_quad/10) {*dynrestart = PETSC_FALSE; PetscFunctionReturn(0);} /* just skip this since this strategy doesn't work well for functions near zero */ 508 quadinterp = 2*(cg->f - fold)/(stepsize*(gd + gd_old)); 509 if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) cg->iter_quad++; 510 else { 511 cg->iter_quad = 0; 512 *dynrestart = PETSC_FALSE; 513 } 514 515 if (cg->iter_quad >= cg->min_quad) { 516 cg->iter_quad = 0; 517 *dynrestart = PETSC_TRUE; 518 } 519 520 PetscFunctionReturn(0); 521 } 522 523 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscBool cg_restart, PetscReal dnorm, PetscReal ginner){ 524 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 525 PetscErrorCode ierr; 526 PetscReal gamma, tau_k, beta; 527 PetscReal tmp, ynorm, ynorm2, snorm, dk_yk, gd; 528 PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk; 529 PetscInt dim; 530 531 PetscFunctionBegin; 532 533 /* Want to implement P.C. versions eventually */ 534 /* Compute CG step */ 535 if (cg_restart) { 536 beta = 0.0; 537 ++cg->resets; 538 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, tao->gradient);CHKERRQ(ierr); 539 } else { 540 switch (cg->cg_type) { 541 case CG_GradientDescent: 542 beta = 0.0; 543 ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, tao->gradient);CHKERRQ(ierr); 544 break; 545 546 case CG_HestenesStiefel: 547 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 548 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 549 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient); CHKERRQ(ierr); 550 ierr = VecNorm(cg->yk, NORM_2, &ynorm); CHKERRQ(ierr); 551 ynorm2 = ynorm*ynorm; 552 ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk); CHKERRQ(ierr); 553 //tau_k = step*dk_yk/ynorm2; 554 //beta = tau_k*(gnorm2 - ginner) / (gd - gd_old); 555 beta = (gnorm2 - ginner) / (gd - gd_old); 556 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, tao->gradient);CHKERRQ(ierr); 557 break; 558 case CG_FletcherReeves: 559 beta = gnorm2 / gnorm2_old; 560 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, tao->gradient);CHKERRQ(ierr); 561 break; 562 case CG_PolakRibiere: 563 beta = (gnorm2 - ginner) / gnorm2_old; 564 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, tao->gradient);CHKERRQ(ierr); 565 break; 566 case CG_PolakRibierePlus: 567 beta = PetscMax((gnorm2-ginner)/gnorm2_old, 0.0); 568 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, tao->gradient);CHKERRQ(ierr); 569 break; 570 case CG_DaiYuan: 571 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 572 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 573 beta = gnorm2 / (gd - gd_old); 574 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, tao->gradient);CHKERRQ(ierr); 575 break; 576 case CG_SSML_BFGS: 577 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 578 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient); CHKERRQ(ierr); 579 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution); CHKERRQ(ierr); 580 ierr = VecNorm(cg->yk, NORM_2, &ynorm); CHKERRQ(ierr); 581 ierr = VecNorm(cg->sk, NORM_2, &snorm); CHKERRQ(ierr); 582 ynorm2 = ynorm*ynorm; 583 ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk); CHKERRQ(ierr); 584 ierr = VecDot(cg->yk, cg->sk, &cg->yts); CHKERRQ(ierr); 585 cg->yty = ynorm2; 586 cg->sts = snorm*snorm; 587 /* TODO: Should only need one of dk_yk and yts */ 588 if (ynorm2 < cg->epsilon){ 589 /* The gradient hasn't changed, so we should stay in the same direction as before. Don't update it or anything.*/ 590 if (snorm < cg->eps_23){ 591 /* We're making no progress. Scaled gradient descent step.*/ 592 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 593 } 594 } else { 595 if (!cg->diag_scaling){ 596 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk); CHKERRQ(ierr); 597 ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha); CHKERRQ(ierr); 598 tmp = gd/dk_yk; 599 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) - (step/tau_k)*gd/dk_yk); 600 /* d <- -t*g + beta*t*d + t*tmp*yk */ 601 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk); CHKERRQ(ierr); 602 } 603 else if (snorm < cg->epsilon || cg->yts < cg->epsilon) { 604 /* We're making no progress. Scaled gradient descent step and reset diagonal scaling */ 605 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 606 } else { 607 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 608 /* Compute the invD vector */ 609 ierr = TaoBNCGComputeDiagScaling(tao, cg->yts, ynorm2); CHKERRQ(ierr); 610 /* Apply the invD scaling to all my vectors */ 611 ierr = VecPointwiseMult(cg->g_work, cg->invD, tao->gradient); CHKERRQ(ierr); 612 ierr = VecPointwiseMult(cg->d_work, cg->invD, tao->stepdirection); CHKERRQ(ierr); 613 ierr = VecPointwiseMult(cg->y_work, cg->invD, cg->yk); CHKERRQ(ierr); 614 /* Construct the constant ytDgkp1 */ 615 ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk); CHKERRQ(ierr); 616 /* Construct the constant for scaling Dkyk in the update */ 617 tmp = gd/dk_yk; 618 /* tau_bfgs can be a parameter we tune via command line in future versions. For now, just setting to one. May instead put this inside the ComputeDiagonalScaling function... */ 619 cg->tau_bfgs = 1.0; 620 /* tau_k = -ytDy/(ytd)^2 * gd */ 621 ierr = VecDot(cg->yk, cg->y_work, &tau_k); 622 tau_k = -tau_k*gd/(dk_yk*dk_yk); 623 /* beta is the constant which adds the dk contribution */ 624 beta = cg->tau_bfgs*gkp1_yk/dk_yk - step*tmp + tau_k; 625 /* Do the update in two steps */ 626 ierr = VecAXPBY(tao->stepdirection, -cg->tau_bfgs, beta, cg->g_work); CHKERRQ(ierr); 627 ierr = VecAXPY(tao->stepdirection, tmp, cg->y_work); CHKERRQ(ierr); 628 }} 629 break; 630 case CG_SSML_DFP: 631 632 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 633 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient); CHKERRQ(ierr); 634 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution); CHKERRQ(ierr); 635 ierr = VecNorm(cg->yk, NORM_2, &ynorm); CHKERRQ(ierr); 636 ierr = VecNorm(cg->sk, NORM_2, &snorm); CHKERRQ(ierr); 637 ynorm2 = ynorm*ynorm; 638 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk); CHKERRQ(ierr); 639 ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk); CHKERRQ(ierr); 640 if (ynorm < cg->epsilon){ 641 /* The gradient hasn't changed, so we should stay in the same direction as before. Don't update it or anything.*/ 642 if (snorm < cg->epsilon){ 643 /* We're making no progress. Gradient descent step. Not scaled by tau_k since it's' is 0 or NaN in this case.*/ 644 ierr = TaoBNCGResetUpdate(tao, gnorm2); 645 } 646 } else { 647 648 tau_k = cg->dfp_scale*snorm*snorm/(step*dk_yk); 649 tmp = tau_k*gkp1_yk/ynorm2; 650 beta = -step*gd/dk_yk; 651 652 /* d <- -t*g + beta*d + tmp*yk */ 653 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk); CHKERRQ(ierr); 654 } 655 break; 656 case CG_SSML_BROYDEN: 657 658 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 659 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient); CHKERRQ(ierr); 660 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution); CHKERRQ(ierr); 661 ierr = VecNorm(cg->yk, NORM_2, &ynorm); CHKERRQ(ierr); 662 ierr = VecNorm(cg->sk, NORM_2, &snorm); CHKERRQ(ierr); 663 ynorm2 = ynorm*ynorm; 664 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk); CHKERRQ(ierr); 665 ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk); CHKERRQ(ierr); 666 667 if (ynorm < cg->epsilon){ 668 /* The gradient hasn't changed, so we should stay in the same direction as before. Don't update it or anything.*/ 669 if (snorm < cg->epsilon){ 670 /* We're making no progress. Gradient descent step.*/ 671 ierr = TaoBNCGResetUpdate(tao, gnorm2); 672 } 673 } else { 674 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 675 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale); 676 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale); 677 tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp; 678 679 /* Used for the gradient */ 680 /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0, it should reproduce the tau_dfp scaling. Same with dfp_scale. */ 681 tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/ynorm2; 682 beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 683 /* d <- -t*g + beta*d + tmp*yk */ 684 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk); CHKERRQ(ierr); 685 } 686 break; 687 688 case CG_HagerZhang: 689 /* Their 2006 paper. Comes from deleting the y_k term from SSML_BFGS, introducing a theta parameter, and using a cutoff for beta. See DK 2013 pg. 315 for a review of CG_DESCENT 5.3 */ 690 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 691 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 692 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient); CHKERRQ(ierr); 693 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution); CHKERRQ(ierr); 694 ierr = VecNorm(cg->yk, NORM_2, &ynorm); CHKERRQ(ierr); 695 ierr = VecNorm(cg->sk, NORM_2, &snorm); CHKERRQ(ierr); 696 ynorm2 = ynorm*ynorm; 697 ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk); CHKERRQ(ierr); 698 ierr = VecDot(cg->yk, cg->sk, &cg->yts); CHKERRQ(ierr); 699 if (cg->use_dynamic_restart){ 700 ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold); CHKERRQ(ierr); 701 } 702 if (ynorm2 < cg->epsilon){ 703 /* The gradient hasn't changed, so we should stay in the same direction as before. Don't update it or anything.*/ 704 if (snorm < cg->eps_23){ 705 /* We're making no progress. Scaled gradient descent step.*/ 706 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 707 } 708 } else if (cg->dynamic_restart){ 709 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 710 } else if (tao->niter % (cg->min_restart_num*dim) == 0 && cg->spaced_restart){ 711 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 712 } else { 713 if (!cg->diag_scaling){ 714 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk); CHKERRQ(ierr); 715 ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha); CHKERRQ(ierr); 716 /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */ 717 tmp = gd/dk_yk; 718 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)); 719 /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */ 720 beta = PetscMax(PetscMax(beta, 0.4*tau_k*gd_old/(dnorm*dnorm)), 0.5*tau_k*gd/(dnorm*dnorm)); 721 /* d <- -t*g + beta*t*d */ 722 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk); CHKERRQ(ierr); 723 } 724 else if (snorm < cg->epsilon || cg->yts < cg->epsilon) { 725 /* We're making no progress. Scaled gradient descent step and reset diagonal scaling */ 726 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 727 } else { 728 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 729 cg->yty = ynorm2; 730 cg->sts = snorm*snorm; 731 /* Compute the invD vector */ 732 ierr = TaoBNCGComputeDiagScaling(tao, cg->yts, ynorm2); CHKERRQ(ierr); 733 /* Apply the invD scaling to all my vectors */ 734 ierr = VecPointwiseMult(cg->g_work, cg->invD, tao->gradient); CHKERRQ(ierr); 735 ierr = VecPointwiseMult(cg->d_work, cg->invD, tao->stepdirection); CHKERRQ(ierr); 736 ierr = VecPointwiseMult(cg->y_work, cg->invD, cg->yk); CHKERRQ(ierr); 737 /* Construct the constant ytDgkp1 */ 738 ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk); CHKERRQ(ierr); 739 /* Construct the constant for scaling Dkyk in the update */ 740 tmp = gd/dk_yk; 741 /* tau_bfgs can be a parameter we tune via command line in future versions. For now, just setting to one. May instead put this inside the ComputeDiagonalScaling function... */ 742 cg->tau_bfgs = 1.0; 743 ierr = VecDot(cg->yk, cg->y_work, &tau_k); 744 tau_k = -tau_k*gd/(dk_yk*dk_yk); 745 /* beta is the constant which adds the dk contribution */ 746 beta = cg->tau_bfgs*gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */ 747 /* From HZ2013, modified to account for diagonal scaling*/ 748 ierr = VecDot(cg->G_old, cg->d_work, &gd_old); 749 ierr = VecDot(tao->stepdirection, cg->g_work, &gd); 750 beta = PetscMax(PetscMax(beta, 0.4*gd_old/(dnorm*dnorm)), 0.5*gd/(dnorm*dnorm)); 751 /* Do the update */ 752 ierr = VecAXPBY(tao->stepdirection, -cg->tau_bfgs, beta, cg->g_work); CHKERRQ(ierr); 753 }} 754 break; 755 case CG_DaiKou: 756 /* 2013 paper. */ 757 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 758 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 759 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient); CHKERRQ(ierr); 760 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution); CHKERRQ(ierr); 761 ierr = VecNorm(cg->yk, NORM_2, &ynorm); CHKERRQ(ierr); 762 ierr = VecNorm(cg->sk, NORM_2, &snorm); CHKERRQ(ierr); 763 ynorm2 = ynorm*ynorm; 764 ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk); CHKERRQ(ierr); 765 ierr = VecDot(cg->yk, cg->sk, &cg->yts); CHKERRQ(ierr); 766 /* TODO: Should only need one of dk_yk and yts */ 767 if (ynorm2 < cg->epsilon){ 768 /* The gradient hasn't changed, so we should stay in the same direction as before. Don't update it or anything.*/ 769 if (snorm < cg->eps_23){ 770 /* We're making no progress. Scaled gradient descent step.*/ 771 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 772 } 773 } else { 774 if (!cg->diag_scaling){ 775 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk); CHKERRQ(ierr); 776 ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha); 777 /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */ 778 tmp = gd/dk_yk; 779 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) - (step/tau_k)*gd/dk_yk + gd/(dnorm*dnorm)); 780 beta = PetscMax(PetscMax(beta, 0.4*tau_k*gd_old/(dnorm*dnorm)), 0.5*tau_k*gd/(dnorm*dnorm)); 781 /* d <- -t*g + beta*t*d */ 782 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk); CHKERRQ(ierr); 783 } 784 else if (snorm < cg->epsilon || cg->yts < cg->epsilon) { 785 /* We're making no progress. Scaled gradient descent step and reset diagonal scaling */ 786 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 787 } else { 788 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 789 cg->yty = ynorm2; 790 cg->sts = snorm*snorm; 791 /* Compute the invD vector */ 792 ierr = TaoBNCGComputeDiagScaling(tao, cg->yts, ynorm2); CHKERRQ(ierr); 793 /* Apply the invD scaling to all my vectors */ 794 ierr = VecPointwiseMult(cg->g_work, cg->invD, tao->gradient); CHKERRQ(ierr); 795 ierr = VecPointwiseMult(cg->d_work, cg->invD, tao->stepdirection); CHKERRQ(ierr); 796 ierr = VecPointwiseMult(cg->y_work, cg->invD, cg->yk); CHKERRQ(ierr); 797 /* Construct the constant ytDgkp1 */ 798 ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk); CHKERRQ(ierr); 799 /* tau_bfgs can be a parameter we tune via command line in future versions. For now, just setting to one. May instead put this inside the ComputeDiagonalScaling function... */ 800 cg->tau_bfgs = 1.0; 801 ierr = VecDot(cg->yk, cg->y_work, &tau_k); 802 tau_k = tau_k*gd/(dk_yk*dk_yk); 803 tmp = gd/dk_yk; 804 /* beta is the constant which adds the dk contribution */ 805 beta = cg->tau_bfgs*gkp1_yk/dk_yk - step*tmp - tau_k; 806 807 /* Update this for the last term in beta */ 808 ierr = VecDot(cg->y_work, tao->stepdirection, &dk_yk); CHKERRQ(ierr); 809 beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */ 810 ierr = VecDot(tao->stepdirection, cg->g_work, &gd); 811 ierr = VecDot(cg->G_old, cg->d_work, &gd_old); 812 beta = PetscMax(PetscMax(beta, 0.4*gd_old/(dnorm*dnorm)), 0.5*gd/(dnorm*dnorm)); 813 /* TODO: need to change these hardcoded constants into user parameters */ 814 /* Do the update */ 815 ierr = VecAXPBY(tao->stepdirection, -cg->tau_bfgs, beta, cg->g_work); CHKERRQ(ierr); 816 }} 817 break; 818 819 case CG_KouDai: 820 821 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 822 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 823 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient); CHKERRQ(ierr); 824 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution); CHKERRQ(ierr); 825 ierr = VecNorm(cg->yk, NORM_2, &ynorm); CHKERRQ(ierr); 826 ierr = VecNorm(cg->sk, NORM_2, &snorm); CHKERRQ(ierr); 827 ynorm2 = ynorm*ynorm; 828 ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk); CHKERRQ(ierr); 829 ierr = VecDot(cg->yk, cg->sk, &cg->yts); CHKERRQ(ierr); 830 ierr = VecGetSize(tao->gradient, &dim); CHKERRQ(ierr); 831 /* TODO: Should only need one of dk_yk and yts */ 832 if (cg->use_dynamic_restart){ 833 ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold); CHKERRQ(ierr); 834 } 835 if (ynorm2 < cg->epsilon){ 836 /* The gradient hasn't changed, so we should stay in the same direction as before. Don't update it or anything.*/ 837 if (snorm < cg->eps_23){ 838 /* We're making no progress. Scaled gradient descent step.*/ 839 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 840 } 841 } else if (cg->dynamic_restart){ 842 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 843 } else if (tao->niter % (cg->min_restart_num*dim) == 0 && cg->spaced_restart){ 844 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 845 } else { 846 if (!cg->diag_scaling){ 847 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk); CHKERRQ(ierr); 848 ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha); CHKERRQ(ierr); 849 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 850 if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */ 851 { 852 beta = cg->zeta*tau_k*gd/(dnorm*dnorm); 853 gamma = 0.0; 854 } else { 855 if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk; 856 /* This seems to be very effective when there's no tau_k scaling... this guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */ 857 else gamma = cg->xi*gd/dk_yk; 858 } 859 /* d <- -t*g + beta*t*d + t*tmp*yk */ 860 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk); CHKERRQ(ierr); 861 } 862 else if (snorm < cg->epsilon || cg->yts < cg->epsilon) { 863 /* We're making no progress. Scaled gradient descent step and reset diagonal scaling */ 864 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 865 } else { 866 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 867 cg->yty = ynorm2; 868 cg->sts = snorm*snorm; 869 /* Compute the invD vector */ 870 ierr = TaoBNCGComputeDiagScaling(tao, cg->yts, ynorm2); CHKERRQ(ierr); 871 /* Apply the invD scaling to all my vectors */ 872 ierr = VecPointwiseMult(cg->g_work, cg->invD, tao->gradient); CHKERRQ(ierr); 873 /* ierr = VecPointwiseMult(cg->d_work, cg->invD, tao->stepdirection); CHKERRQ(ierr); */ 874 ierr = VecPointwiseMult(cg->y_work, cg->invD, cg->yk); CHKERRQ(ierr); 875 /* Construct the constant ytDgkp1 */ 876 ierr = VecDot(cg->yk, cg->g_work, &gkp1D_yk); CHKERRQ(ierr); 877 /* Construct the constant for scaling Dkyk in the update */ 878 gamma = gd/dk_yk; 879 /* tau_k = -ytDy/(ytd)^2 * gd */ 880 ierr = VecDot(cg->yk, cg->y_work, &tau_k); 881 tau_k = tau_k*gd/(dk_yk*dk_yk); 882 /* beta is the constant which adds the d_k contribution */ 883 beta = gkp1D_yk/dk_yk - step*gamma - tau_k; 884 /* Here is the requisite check */ 885 ierr = VecDot(tao->stepdirection, cg->g_work, &tmp); 886 if (cg->neg_xi){ 887 /* modified KD implementation */ 888 if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk; 889 else gamma = cg->xi*gd/dk_yk; 890 if (beta < cg->zeta*tmp/(dnorm*dnorm)){ 891 beta = cg->zeta*tmp/(dnorm*dnorm); 892 gamma = 0.0; 893 } 894 } else { /* original KD 2015 implementation */ 895 if (beta < cg->zeta*tmp/(dnorm*dnorm)) { 896 beta = cg->zeta*tmp/(dnorm*dnorm); 897 gamma = 0.0; 898 } else { 899 gamma = cg->xi*gd/dk_yk; 900 } 901 } 902 /* Do the update in two steps */ 903 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work); CHKERRQ(ierr); 904 ierr = VecAXPY(tao->stepdirection, gamma, cg->y_work); CHKERRQ(ierr); 905 }} 906 break; 907 case CG_BFGS_Mod: 908 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 909 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient); CHKERRQ(ierr); 910 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution); CHKERRQ(ierr); 911 ierr = VecNorm(cg->yk, NORM_2, &ynorm); CHKERRQ(ierr); 912 ierr = VecNorm(cg->sk, NORM_2, &snorm); CHKERRQ(ierr); 913 ynorm2 = ynorm*ynorm; 914 ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk); CHKERRQ(ierr); 915 ierr = VecDot(cg->yk, cg->sk, &cg->yts); CHKERRQ(ierr); 916 /* TODO: Should only need one of dk_yk and yts */ 917 if (ynorm2 < cg->epsilon){ 918 /* The gradient hasn't changed, so we should stay in the same direction as before. Don't update it or anything.*/ 919 if (snorm < cg->eps_23){ 920 /* We're making no progress. Scaled gradient descent step.*/ 921 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 922 } 923 } else { 924 if (!cg->diag_scaling){ 925 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk); CHKERRQ(ierr); 926 ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha); CHKERRQ(ierr); 927 tmp = gd/dk_yk; 928 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) - (step/tau_k)*gd/dk_yk); 929 /* d <- -t*g + beta*t*d + t*tmp*yk */ 930 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk); CHKERRQ(ierr); 931 } 932 else if (snorm < cg->epsilon || cg->yts < cg->epsilon) { 933 /* We're making no progress. Scaled gradient descent step and reset diagonal scaling */ 934 ierr = TaoBNCGResetUpdate(tao, gnorm2); CHKERRQ(ierr); 935 } else { 936 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 937 cg->yty = ynorm2; 938 cg->sts = snorm*snorm; 939 /* Compute the invD vector */ 940 ierr = TaoBNCGComputeDiagScaling(tao, cg->yts, ynorm2); CHKERRQ(ierr); 941 /* Apply the invD scaling to all my vectors */ 942 ierr = VecPointwiseMult(cg->g_work, cg->invD, tao->gradient); CHKERRQ(ierr); 943 ierr = VecPointwiseMult(cg->d_work, cg->invD, tao->stepdirection); CHKERRQ(ierr); 944 ierr = VecPointwiseMult(cg->y_work, cg->invD, cg->yk); CHKERRQ(ierr); 945 /* Construct the constant ytDgkp1 */ 946 ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk); CHKERRQ(ierr); 947 /* Construct the constant for scaling Dkyk in the update */ 948 tmp = gd/dk_yk; 949 /* tau_bfgs can be a parameter we tune via command line in future versions. For now, just setting to one. May instead put this inside the ComputeDiagonalScaling function... */ 950 cg->tau_bfgs = 1.0; 951 /* tau_k = -ytDy/(ytd)^2 * gd */ 952 ierr = VecDot(cg->yk, cg->y_work, &tau_k); 953 tau_k = -tau_k*gd/(dk_yk*dk_yk); 954 /* beta is the constant which adds the dk contribution */ 955 beta = cg->tau_bfgs*gkp1_yk/dk_yk - step*tmp + tau_k; 956 /* Do the update in two steps */ 957 ierr = VecAXPBY(tao->stepdirection, -cg->tau_bfgs, beta, cg->g_work); CHKERRQ(ierr); 958 ierr = VecAXPY(tao->stepdirection, tmp, cg->y_work); CHKERRQ(ierr); 959 }} 960 default: 961 beta = 0.0; 962 break; 963 } 964 } 965 PetscFunctionReturn(0); 966 } 967 968 PETSC_INTERN PetscErrorCode TaoBNCGSteffensonAcceleration(Tao tao){ 969 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 970 PetscErrorCode ierr; 971 PetscReal mag1, mag2; 972 PetscReal resnorm; 973 PetscReal steff_f; 974 PetscFunctionBegin; 975 if (tao->niter > 2 && tao->niter % 2 == 0){ 976 ierr = VecCopy(cg->steffnp1, cg->steffnm1); CHKERRQ(ierr); /* X_np1 to X_nm1 since it's been two iterations*/ 977 ierr = VecCopy(cg->X_old, cg->steffn); CHKERRQ(ierr); /* Get X_n */ 978 ierr = VecCopy(tao->solution, cg->steffnp1); CHKERRQ(ierr); 979 980 /* Begin step 4 */ 981 ierr = VecCopy(cg->steffnm1, cg->W); CHKERRQ(ierr); 982 ierr = VecAXPBY(cg->W, 1.0, -1.0, cg->steffn); CHKERRQ(ierr); 983 ierr = VecDot(cg->W, cg->W, &mag1); 984 ierr = VecAXPBYPCZ(cg->W, -1.0, 1.0, -1.0, cg->steffn, cg->steffnp1); CHKERRQ(ierr); 985 ierr = VecDot(cg->W, cg->W, &mag2); 986 ierr = VecCopy(cg->steffnm1, cg->steffva); CHKERRQ(ierr); 987 ierr = VecAXPY(cg->steffva, -mag1/mag2, cg->W); CHKERRQ(ierr); 988 989 ierr = TaoComputeObjectiveAndGradient(tao, cg->steffva, &steff_f, cg->g_work); CHKERRQ(ierr); 990 991 /* Check if the accelerated point has converged*/ 992 ierr = VecFischer(cg->steffva, cg->g_work, tao->XL, tao->XU, cg->W);CHKERRQ(ierr); 993 ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr); 994 if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); 995 //ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr); 996 //ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr); 997 ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); 998 ierr = PetscPrintf(PETSC_COMM_SELF, "va f: %20.19e\t va gnorm: %20.19e\n", steff_f, resnorm); 999 1000 } else if (tao->niter == 2){ 1001 ierr = VecCopy(tao->solution, cg->steffnp1); CHKERRQ(ierr); 1002 mag1 = cg->sts; /* = |x1 - x0|^2 */ 1003 ierr = VecCopy(cg->steffnm1, cg->steffvatmp); CHKERRQ(ierr); 1004 ierr = VecAXPBYPCZ(cg->steffva, 1.0, -1.0, 0.0, cg->steffnp1, cg->steffn); 1005 ierr = VecAXPBYPCZ(cg->steffva, 1.0, -1.0, 1.0, cg->steffnm1, cg->steffn); 1006 ierr = VecNorm(cg->steffva, NORM_2, &mag2); CHKERRQ(ierr); 1007 mag2 = mag2*mag2; 1008 ierr = VecAXPBY(cg->steffva, 1.0, -mag1/mag2, cg->steffvatmp); CHKERRQ(ierr); 1009 // finished step 2 1010 } else if (tao->niter == 1){ 1011 ierr = VecCopy(cg->X_old, cg->steffnm1); CHKERRQ(ierr); 1012 ierr = VecCopy(tao->solution, cg->steffn); CHKERRQ(ierr); 1013 } 1014 /* Now have step 2 done of method */ 1015 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm) 1020 { 1021 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 1022 PetscErrorCode ierr; 1023 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 1024 PetscReal step=1.0,gnorm2,gd,ginner,dnorm; 1025 PetscReal gnorm2_old,f_old,resnorm, gnorm_old; 1026 PetscBool cg_restart, gd_fallback = PETSC_FALSE; 1027 1028 PetscFunctionBegin; 1029 /* We are now going to perform a line search along the direction. */ 1030 ++tao->niter; 1031 1032 /* Store solution and gradient info before it changes */ 1033 ierr = VecCopy(tao->solution, cg->X_old);CHKERRQ(ierr); 1034 ierr = VecCopy(tao->gradient, cg->G_old);CHKERRQ(ierr); 1035 ierr = VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old);CHKERRQ(ierr); 1036 1037 gnorm_old = gnorm; 1038 gnorm2_old = gnorm_old*gnorm_old; 1039 f_old = cg->f; 1040 /* Perform bounded line search */ 1041 ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr); 1042 ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr); 1043 ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); 1044 1045 /* Check linesearch failure */ 1046 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 1047 ++cg->ls_fails; 1048 1049 if (cg->cg_type == CG_GradientDescent || gd_fallback){ 1050 /* Nothing left to do but fail out of the optimization */ 1051 step = 0.0; 1052 tao->reason = TAO_DIVERGED_LS_FAILURE; 1053 } else { 1054 /* Restore previous point */ 1055 ierr = VecCopy(cg->X_old, tao->solution);CHKERRQ(ierr); 1056 ierr = VecCopy(cg->G_old, tao->gradient);CHKERRQ(ierr); 1057 ierr = VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient);CHKERRQ(ierr); 1058 1059 gnorm = gnorm_old; 1060 gnorm2 = gnorm2_old; 1061 cg->f = f_old; 1062 1063 /* Fall back on the scaled gradient step */ 1064 ierr = TaoBNCGResetUpdate(tao, gnorm2); 1065 ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr); 1066 1067 ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr); 1068 ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr); 1069 ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); 1070 1071 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){ 1072 /* Nothing left to do but fail out of the optimization */ 1073 cg->ls_fails++; 1074 step = 0.0; 1075 tao->reason = TAO_DIVERGED_LS_FAILURE; 1076 } 1077 } 1078 } 1079 /* Convergence test for line search failure */ 1080 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1081 1082 /* Standard convergence test */ 1083 /* Make sure convergence test is the same. */ 1084 ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr); 1085 ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr); 1086 if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); 1087 ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr); 1088 ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr); 1089 ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); 1090 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1091 1092 /* Assert we have an updated step and we need at least one more iteration. */ 1093 /* Calculate the next direction */ 1094 /* Estimate the active set at the new solution */ 1095 ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr); 1096 /* Compute the projected gradient and its norm */ 1097 ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr); 1098 ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr); 1099 ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr); 1100 gnorm2 = gnorm*gnorm; 1101 1102 /* Check restart conditions for using steepest descent */ 1103 cg_restart = PETSC_FALSE; 1104 ierr = VecDot(tao->gradient, cg->G_old, &ginner);CHKERRQ(ierr); 1105 ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr); 1106 1107 ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, cg_restart, dnorm, ginner); CHKERRQ(ierr); 1108 1109 ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr); 1110 1111 if (cg->cg_type != CG_GradientDescent) { 1112 /* Figure out which previously active variables became inactive this iteration */ 1113 ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr); 1114 if (cg->inactive_idx && cg->inactive_old) { 1115 ierr = ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives);CHKERRQ(ierr); 1116 } 1117 /* Selectively reset the CG step those freshly inactive variables */ 1118 if (cg->new_inactives) { 1119 ierr = VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr); 1120 ierr = VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr); 1121 ierr = VecCopy(cg->inactive_grad, cg->inactive_step);CHKERRQ(ierr); 1122 ierr = VecScale(cg->inactive_step, -1.0);CHKERRQ(ierr); 1123 ierr = VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr); 1124 ierr = VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr); 1125 } 1126 1127 /* Verify that this is a descent direction */ 1128 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 1129 ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm); 1130 /* TODO: potentially remove the third check */ 1131 if (gd >= 0 || PetscIsInfOrNanReal(gd) || gd >= -cg->epsilon) { 1132 /* Not a descent direction, so we reset back to projected gradient descent */ 1133 PetscPrintf(PETSC_COMM_SELF, "gd is small or positive: %20.19e\n", gd); 1134 ierr = TaoBNCGResetUpdate(tao, gnorm2); 1135 ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr); 1136 ++cg->descent_error; 1137 gd_fallback = PETSC_TRUE; 1138 } else { 1139 gd_fallback = PETSC_FALSE; 1140 } 1141 } 1142 1143 PetscFunctionReturn(0); 1144 } 1145