1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> 3 #include <petscksp.h> 4 5 #define CG_GradientDescent 0 6 #define CG_HestenesStiefel 1 7 #define CG_FletcherReeves 2 8 #define CG_PolakRibierePolyak 3 9 #define CG_PolakRibierePlus 4 10 #define CG_DaiYuan 5 11 #define CG_HagerZhang 6 12 #define CG_DaiKou 7 13 #define CG_KouDai 8 14 #define CG_SSML_BFGS 9 15 #define CG_SSML_DFP 10 16 #define CG_SSML_BROYDEN 11 17 #define CG_PCGradientDescent 12 18 #define CGTypes 13 19 20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"}; 21 22 #define CG_AS_NONE 0 23 #define CG_AS_BERTSEKAS 1 24 #define CG_AS_SIZE 2 25 26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 27 28 PetscErrorCode TaoBNCGSetRecycleFlag(Tao tao, PetscBool recycle) 29 { 30 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 31 32 PetscFunctionBegin; 33 cg->recycle = recycle; 34 PetscFunctionReturn(0); 35 } 36 37 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) 38 { 39 PetscErrorCode ierr; 40 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 41 42 PetscFunctionBegin; 43 ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr); 44 if (cg->inactive_idx) { 45 ierr = ISDuplicate(cg->inactive_idx, &cg->inactive_old);CHKERRQ(ierr); 46 ierr = ISCopy(cg->inactive_idx, cg->inactive_old);CHKERRQ(ierr); 47 } 48 switch (asType) { 49 case CG_AS_NONE: 50 ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr); 51 ierr = VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx);CHKERRQ(ierr); 52 ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr); 53 ierr = ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx);CHKERRQ(ierr); 54 break; 55 56 case CG_AS_BERTSEKAS: 57 /* Use gradient descent to estimate the active set */ 58 ierr = VecCopy(cg->unprojected_gradient, cg->W);CHKERRQ(ierr); 59 ierr = VecScale(cg->W, -1.0);CHKERRQ(ierr); 60 ierr = TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, 61 &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);CHKERRQ(ierr); 62 break; 63 64 default: 65 break; 66 } 67 PetscFunctionReturn(0); 68 } 69 70 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step) 71 { 72 PetscErrorCode ierr; 73 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 74 75 PetscFunctionBegin; 76 switch (asType) { 77 case CG_AS_NONE: 78 ierr = VecISSet(step, cg->active_idx, 0.0);CHKERRQ(ierr); 79 break; 80 81 case CG_AS_BERTSEKAS: 82 ierr = TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step);CHKERRQ(ierr); 83 break; 84 85 default: 86 break; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 static PetscErrorCode TaoSolve_BNCG(Tao tao) 92 { 93 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 94 PetscErrorCode ierr; 95 PetscReal step=1.0,gnorm,gnorm2, resnorm; 96 PetscInt nDiff; 97 98 PetscFunctionBegin; 99 /* Project the current point onto the feasible set */ 100 ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); 101 ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr); 102 103 /* Project the initial point onto the feasible region */ 104 ierr = TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution);CHKERRQ(ierr); 105 106 if (nDiff > 0 || !cg->recycle){ 107 ierr = TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient);CHKERRQ(ierr); 108 } 109 ierr = VecNorm(cg->unprojected_gradient,NORM_2,&gnorm);CHKERRQ(ierr); 110 if (PetscIsInfOrNanReal(cg->f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 111 112 /* Estimate the active set and compute the projected gradient */ 113 ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr); 114 115 /* Project the gradient and calculate the norm */ 116 ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr); 117 ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr); 118 ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr); 119 gnorm2 = gnorm*gnorm; 120 121 /* Initialize counters */ 122 tao->niter = 0; 123 cg->ls_fails = cg->descent_error = 0; 124 cg->resets = -1; 125 cg->skipped_updates = cg->pure_gd_steps = 0; 126 cg->iter_quad = 0; 127 128 /* Convergence test at the starting point. */ 129 tao->reason = TAO_CONTINUE_ITERATING; 130 131 ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr); 132 ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr); 133 if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 134 ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr); 135 ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr); 136 ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); 137 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 138 /* Calculate initial direction. */ 139 if (!cg->recycle) { 140 /* We are not recycling a solution/history from a past TaoSolve */ 141 ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr); 142 } 143 /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */ 144 while (1) { 145 /* Call general purpose update function */ 146 if (tao->ops->update) { 147 ierr = (*tao->ops->update)(tao, tao->niter, tao->user_update);CHKERRQ(ierr); 148 } 149 ierr = TaoBNCGConductIteration(tao, gnorm);CHKERRQ(ierr); 150 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 151 } 152 } 153 154 static PetscErrorCode TaoSetUp_BNCG(Tao tao) 155 { 156 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 157 PetscErrorCode ierr; 158 159 PetscFunctionBegin; 160 if (!tao->gradient) { 161 ierr = VecDuplicate(tao->solution,&tao->gradient);CHKERRQ(ierr); 162 } 163 if (!tao->stepdirection) { 164 ierr = VecDuplicate(tao->solution,&tao->stepdirection);CHKERRQ(ierr); 165 } 166 if (!cg->W) { 167 ierr = VecDuplicate(tao->solution,&cg->W);CHKERRQ(ierr); 168 } 169 if (!cg->work) { 170 ierr = VecDuplicate(tao->solution,&cg->work);CHKERRQ(ierr); 171 } 172 if (!cg->sk) { 173 ierr = VecDuplicate(tao->solution,&cg->sk);CHKERRQ(ierr); 174 } 175 if (!cg->yk) { 176 ierr = VecDuplicate(tao->gradient,&cg->yk);CHKERRQ(ierr); 177 } 178 if (!cg->X_old) { 179 ierr = VecDuplicate(tao->solution,&cg->X_old);CHKERRQ(ierr); 180 } 181 if (!cg->G_old) { 182 ierr = VecDuplicate(tao->gradient,&cg->G_old);CHKERRQ(ierr); 183 } 184 if (cg->diag_scaling){ 185 ierr = VecDuplicate(tao->solution,&cg->d_work);CHKERRQ(ierr); 186 ierr = VecDuplicate(tao->solution,&cg->y_work);CHKERRQ(ierr); 187 ierr = VecDuplicate(tao->solution,&cg->g_work);CHKERRQ(ierr); 188 } 189 if (!cg->unprojected_gradient) { 190 ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient);CHKERRQ(ierr); 191 } 192 if (!cg->unprojected_gradient_old) { 193 ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient_old);CHKERRQ(ierr); 194 } 195 ierr = MatLMVMAllocate(cg->B, cg->sk, cg->yk);CHKERRQ(ierr); 196 if (cg->pc){ 197 ierr = MatLMVMSetJ0(cg->B, cg->pc);CHKERRQ(ierr); 198 } 199 PetscFunctionReturn(0); 200 } 201 202 static PetscErrorCode TaoDestroy_BNCG(Tao tao) 203 { 204 TAO_BNCG *cg = (TAO_BNCG*) tao->data; 205 PetscErrorCode ierr; 206 207 PetscFunctionBegin; 208 if (tao->setupcalled) { 209 ierr = VecDestroy(&cg->W);CHKERRQ(ierr); 210 ierr = VecDestroy(&cg->work);CHKERRQ(ierr); 211 ierr = VecDestroy(&cg->X_old);CHKERRQ(ierr); 212 ierr = VecDestroy(&cg->G_old);CHKERRQ(ierr); 213 ierr = VecDestroy(&cg->unprojected_gradient);CHKERRQ(ierr); 214 ierr = VecDestroy(&cg->unprojected_gradient_old);CHKERRQ(ierr); 215 ierr = VecDestroy(&cg->g_work);CHKERRQ(ierr); 216 ierr = VecDestroy(&cg->d_work);CHKERRQ(ierr); 217 ierr = VecDestroy(&cg->y_work);CHKERRQ(ierr); 218 ierr = VecDestroy(&cg->sk);CHKERRQ(ierr); 219 ierr = VecDestroy(&cg->yk);CHKERRQ(ierr); 220 } 221 ierr = ISDestroy(&cg->active_lower);CHKERRQ(ierr); 222 ierr = ISDestroy(&cg->active_upper);CHKERRQ(ierr); 223 ierr = ISDestroy(&cg->active_fixed);CHKERRQ(ierr); 224 ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr); 225 ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr); 226 ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr); 227 ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr); 228 ierr = MatDestroy(&cg->B);CHKERRQ(ierr); 229 if (cg->pc) { 230 ierr = MatDestroy(&cg->pc);CHKERRQ(ierr); 231 } 232 ierr = PetscFree(tao->data);CHKERRQ(ierr); 233 PetscFunctionReturn(0); 234 } 235 236 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao) 237 { 238 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 239 PetscErrorCode ierr; 240 241 PetscFunctionBegin; 242 ierr = TaoLineSearchSetFromOptions(tao->linesearch);CHKERRQ(ierr); 243 ierr = PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization");CHKERRQ(ierr); 244 ierr = PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr); 245 if (cg->cg_type != CG_SSML_BFGS){ 246 cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */ 247 } 248 if (CG_GradientDescent == cg->cg_type){ 249 cg->cg_type = CG_PCGradientDescent; 250 /* Set scaling equal to none or, at best, scalar scaling. */ 251 cg->unscaled_restart = PETSC_TRUE; 252 cg->diag_scaling = PETSC_FALSE; 253 } 254 ierr = PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr); 255 256 ierr = PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL);CHKERRQ(ierr); 257 ierr = PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL);CHKERRQ(ierr); 258 ierr = PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL);CHKERRQ(ierr); 259 ierr = PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL);CHKERRQ(ierr); 260 ierr = PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL);CHKERRQ(ierr); 261 ierr = PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL);CHKERRQ(ierr); 262 ierr = PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL);CHKERRQ(ierr); 263 ierr = PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL);CHKERRQ(ierr); 264 ierr = PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL);CHKERRQ(ierr); 265 ierr = PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL);CHKERRQ(ierr); 266 ierr = PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL);CHKERRQ(ierr); 267 ierr = PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL);CHKERRQ(ierr); 268 ierr = PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL);CHKERRQ(ierr); 269 ierr = PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL);CHKERRQ(ierr); 270 ierr = PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL);CHKERRQ(ierr); 271 ierr = PetscOptionsBool("-tao_bncg_recycle","enable recycling the existing solution, gradient, and diagonal scaling vector at the start of a new TaoSolve() call","",cg->recycle,&cg->recycle,NULL);CHKERRQ(ierr); 272 ierr = PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL);CHKERRQ(ierr); 273 ierr = PetscOptionsBool("-tao_bncg_no_scaling","Disable all scaling except in restarts","",cg->no_scaling,&cg->no_scaling,NULL);CHKERRQ(ierr); 274 if (cg->no_scaling){ 275 cg->diag_scaling = PETSC_FALSE; 276 cg->alpha = -1.0; 277 } 278 if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling){ /* Some more default options that appear to be good. */ 279 cg->neg_xi = PETSC_TRUE; 280 } 281 ierr = PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL);CHKERRQ(ierr); 282 ierr = PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL);CHKERRQ(ierr); 283 ierr = PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL);CHKERRQ(ierr); 284 ierr = PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts","",cg->delta_min,&cg->delta_min,NULL);CHKERRQ(ierr); 285 ierr = PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts","",cg->delta_max,&cg->delta_max,NULL);CHKERRQ(ierr); 286 287 ierr = PetscOptionsTail();CHKERRQ(ierr); 288 ierr = MatSetFromOptions(cg->B);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) 293 { 294 PetscBool isascii; 295 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 296 PetscErrorCode ierr; 297 298 PetscFunctionBegin; 299 ierr = PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);CHKERRQ(ierr); 300 if (isascii) { 301 ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr); 302 ierr = PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type]);CHKERRQ(ierr); 303 ierr = PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %i\n", cg->skipped_updates);CHKERRQ(ierr); 304 ierr = PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %i\n", cg->resets);CHKERRQ(ierr); 305 ierr = PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %i\n", cg->pure_gd_steps);CHKERRQ(ierr); 306 ierr = PetscViewerASCIIPrintf(viewer, "Not a descent direction: %i\n", cg->descent_error);CHKERRQ(ierr); 307 ierr = PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails);CHKERRQ(ierr); 308 if (cg->diag_scaling){ 309 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr); 310 if (isascii) { 311 ierr = PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr); 312 ierr = MatView(cg->B, viewer);CHKERRQ(ierr); 313 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 314 } 315 } 316 ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr); 317 } 318 PetscFunctionReturn(0); 319 } 320 321 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha) 322 { 323 PetscReal a, b, c, sig1, sig2; 324 325 PetscFunctionBegin; 326 *scale = 0.0; 327 328 if (1.0 == alpha){ 329 *scale = yts/yty; 330 } else if (0.0 == alpha){ 331 *scale = sts/yts; 332 } 333 else if (-1.0 == alpha) *scale = 1.0; 334 else { 335 a = yty; 336 b = yts; 337 c = sts; 338 a *= alpha; 339 b *= -(2.0*alpha - 1.0); 340 c *= alpha - 1.0; 341 sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 342 sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a); 343 /* accept the positive root as the scalar */ 344 if (sig1 > 0.0) { 345 *scale = sig1; 346 } else if (sig2 > 0.0) { 347 *scale = sig2; 348 } else { 349 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar"); 350 } 351 } 352 PetscFunctionReturn(0); 353 } 354 355 /*MC 356 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 357 358 Options Database Keys: 359 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled) 360 . -tao_bncg_eta <r> - restart tolerance 361 . -tao_bncg_type <taocg_type> - cg formula 362 . -tao_bncg_as_type <none,bertsekas> - active set estimation method 363 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation 364 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation 365 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision. 366 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method 367 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 368 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 369 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method 370 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method 371 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method 372 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method 373 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True. 374 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods 375 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts 376 . -tao_bncg_zeta <r> - Scaling parameter in the KD method 377 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps 378 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps 379 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart 380 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem 381 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations 382 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults. 383 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions 384 385 Notes: 386 CG formulas are: 387 + "gd" - Gradient Descent 388 . "fr" - Fletcher-Reeves 389 . "pr" - Polak-Ribiere-Polyak 390 . "prp" - Polak-Ribiere-Plus 391 . "hs" - Hestenes-Steifel 392 . "dy" - Dai-Yuan 393 . "ssml_bfgs" - Self-Scaling Memoryless BFGS 394 . "ssml_dfp" - Self-Scaling Memoryless DFP 395 . "ssml_brdn" - Self-Scaling Memoryless Broyden 396 . "hz" - Hager-Zhang (CG_DESCENT 5.3) 397 . "dk" - Dai-Kou (2013) 398 - "kd" - Kou-Dai (2015) 399 400 Level: beginner 401 M*/ 402 403 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) 404 { 405 TAO_BNCG *cg; 406 const char *morethuente_type = TAOLINESEARCHMT; 407 PetscErrorCode ierr; 408 409 PetscFunctionBegin; 410 tao->ops->setup = TaoSetUp_BNCG; 411 tao->ops->solve = TaoSolve_BNCG; 412 tao->ops->view = TaoView_BNCG; 413 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 414 tao->ops->destroy = TaoDestroy_BNCG; 415 416 /* Override default settings (unless already changed) */ 417 if (!tao->max_it_changed) tao->max_it = 2000; 418 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 419 420 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 421 /* method. In particular, gtol should be less that 0.5; the value used in */ 422 /* Nocedal and Wright is 0.10. We use the default values for the */ 423 /* linesearch because it seems to work better. */ 424 ierr = TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch);CHKERRQ(ierr); 425 ierr = PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1);CHKERRQ(ierr); 426 ierr = TaoLineSearchSetType(tao->linesearch, morethuente_type);CHKERRQ(ierr); 427 ierr = TaoLineSearchUseTaoRoutines(tao->linesearch, tao);CHKERRQ(ierr); 428 ierr = TaoLineSearchSetOptionsPrefix(tao->linesearch,tao->hdr.prefix);CHKERRQ(ierr); 429 430 ierr = PetscNewLog(tao,&cg);CHKERRQ(ierr); 431 tao->data = (void*)cg; 432 ierr = KSPInitializePackage();CHKERRQ(ierr); 433 ierr = MatCreate(PetscObjectComm((PetscObject)tao), &cg->B);CHKERRQ(ierr); 434 ierr = PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1);CHKERRQ(ierr); 435 ierr = MatSetOptionsPrefix(cg->B, "tao_bncg_");CHKERRQ(ierr); 436 ierr = MatSetType(cg->B, MATLMVMDIAGBROYDEN);CHKERRQ(ierr); 437 438 cg->pc = NULL; 439 440 cg->dk_eta = 0.5; 441 cg->hz_eta = 0.4; 442 cg->dynamic_restart = PETSC_FALSE; 443 cg->unscaled_restart = PETSC_FALSE; 444 cg->no_scaling = PETSC_FALSE; 445 cg->delta_min = 1e-7; 446 cg->delta_max = 100; 447 cg->theta = 1.0; 448 cg->hz_theta = 1.0; 449 cg->dfp_scale = 1.0; 450 cg->bfgs_scale = 1.0; 451 cg->zeta = 0.1; 452 cg->min_quad = 6; 453 cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/ 454 cg->xi = 1.0; 455 cg->neg_xi = PETSC_TRUE; 456 cg->spaced_restart = PETSC_FALSE; 457 cg->tol_quad = 1e-8; 458 cg->as_step = 0.001; 459 cg->as_tol = 0.001; 460 cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/ 461 cg->as_type = CG_AS_BERTSEKAS; 462 cg->cg_type = CG_SSML_BFGS; 463 cg->recycle = PETSC_FALSE; 464 cg->alpha = 1.0; 465 cg->diag_scaling = PETSC_TRUE; 466 PetscFunctionReturn(0); 467 } 468 469 470 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq) 471 { 472 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 473 PetscErrorCode ierr; 474 PetscReal scaling; 475 476 PetscFunctionBegin; 477 ++cg->resets; 478 scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23); 479 scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling)); 480 if (cg->unscaled_restart) { 481 scaling = 1.0; 482 ++cg->pure_gd_steps; 483 } 484 ierr = VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient);CHKERRQ(ierr); 485 /* Also want to reset our diagonal scaling with each restart */ 486 if (cg->diag_scaling) { 487 ierr = MatLMVMReset(cg->B, PETSC_FALSE);CHKERRQ(ierr); 488 } 489 PetscFunctionReturn(0); 490 } 491 492 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold) 493 { 494 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 495 PetscReal quadinterp; 496 497 PetscFunctionBegin; 498 if (cg->f < cg->min_quad/10) { 499 *dynrestart = PETSC_FALSE; 500 PetscFunctionReturn(0); 501 } /* just skip this since this strategy doesn't work well for functions near zero */ 502 quadinterp = 2.0*(cg->f - fold)/(stepsize*(gd + gd_old)); 503 if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad; 504 else { 505 cg->iter_quad = 0; 506 *dynrestart = PETSC_FALSE; 507 } 508 if (cg->iter_quad >= cg->min_quad) { 509 cg->iter_quad = 0; 510 *dynrestart = PETSC_TRUE; 511 } 512 PetscFunctionReturn(0); 513 } 514 515 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback) 516 { 517 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 518 PetscErrorCode ierr; 519 PetscReal gamma = 1.0, tau_k, beta; 520 PetscReal tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk=1.0, gd; 521 PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg; 522 PetscInt dim; 523 PetscBool cg_restart = PETSC_FALSE; 524 PetscFunctionBegin; 525 526 /* Local curvature check to see if we need to restart */ 527 if (tao->niter >= 1 || cg->recycle){ 528 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr); 529 ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr); 530 ynorm2 = ynorm*ynorm; 531 ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr); 532 if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON){ 533 cg_restart = PETSC_TRUE; 534 ++cg->skipped_updates; 535 } 536 if (cg->spaced_restart) { 537 ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr); 538 if (tao->niter % (dim*cg->min_restart_num)) cg_restart = PETSC_TRUE; 539 } 540 } 541 /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */ 542 if (cg->spaced_restart){ 543 ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr); 544 if (0 == tao->niter % (6*dim)) cg_restart = PETSC_TRUE; 545 } 546 /* Compute the diagonal scaling vector if applicable */ 547 if (cg->diag_scaling) { 548 ierr = MatLMVMUpdate(cg->B, tao->solution, tao->gradient);CHKERRQ(ierr); 549 } 550 551 /* A note on diagonal scaling (to be added to paper): 552 For the FR, PR, PRP, and DY methods, the diagonally scaled versions 553 must be derived as a preconditioned CG method rather than as 554 a Hessian initialization like in the Broyden methods. */ 555 556 /* In that case, one writes the objective function as 557 f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k). 558 Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to 559 HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the 560 same under preconditioning. Note that A is diagonal, such that A^T = A. */ 561 562 /* This yields questions like what the dot product d_k^T y_k 563 should look like. HZ mistakenly treats that as the same under 564 preconditioning, but that is not necessarily true. */ 565 566 /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation, 567 we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}), 568 yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is 569 NOT the same if our preconditioning matrix is updated between iterations. 570 This same issue is found when considering dot products of the form g_{k+1}^T y_k. */ 571 572 /* Compute CG step direction */ 573 if (cg_restart) { 574 ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr); 575 } else if (pcgd_fallback) { 576 /* Just like preconditioned CG */ 577 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 578 ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);CHKERRQ(ierr); 579 } else if (ynorm2 > PETSC_MACHINE_EPSILON) { 580 switch (cg->cg_type) { 581 case CG_PCGradientDescent: 582 if (!cg->diag_scaling){ 583 if (!cg->no_scaling){ 584 cg->sts = step*step*dnorm*dnorm; 585 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr); 586 } else { 587 tau_k = 1.0; 588 ++cg->pure_gd_steps; 589 } 590 ierr = VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient);CHKERRQ(ierr); 591 } else { 592 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 593 ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);CHKERRQ(ierr); 594 } 595 break; 596 597 case CG_HestenesStiefel: 598 /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */ 599 if (!cg->diag_scaling){ 600 cg->sts = step*step*dnorm*dnorm; 601 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 602 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr); 603 beta = tau_k*gkp1_yk/dk_yk; 604 ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr); 605 } else { 606 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 607 ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr); 608 beta = gkp1_yk/dk_yk; 609 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr); 610 } 611 break; 612 613 case CG_FletcherReeves: 614 ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr); 615 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr); 616 ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr); 617 ynorm2 = ynorm*ynorm; 618 ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr); 619 if (!cg->diag_scaling){ 620 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha);CHKERRQ(ierr); 621 beta = tau_k*gnorm2/gnorm2_old; 622 ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr); 623 } else { 624 ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Before it's updated */ 625 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 626 ierr = VecDot(tao->gradient, cg->g_work, &tmp);CHKERRQ(ierr); 627 beta = tmp/gnorm2_old; 628 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr); 629 } 630 break; 631 632 case CG_PolakRibierePolyak: 633 snorm = step*dnorm; 634 if (!cg->diag_scaling){ 635 ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr); 636 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 637 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr); 638 beta = tau_k*gkp1_yk/gnorm2_old; 639 ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr); 640 } else { 641 ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); 642 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 643 ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr); 644 beta = gkp1_yk/gnorm2_old; 645 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr); 646 } 647 break; 648 649 case CG_PolakRibierePlus: 650 ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr); 651 ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr); 652 ynorm2 = ynorm*ynorm; 653 if (!cg->diag_scaling){ 654 ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr); 655 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 656 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr); 657 beta = tau_k*gkp1_yk/gnorm2_old; 658 beta = PetscMax(beta, 0.0); 659 ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr); 660 } else { 661 ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Old gtDg */ 662 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 663 ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr); 664 beta = gkp1_yk/gnorm2_old; 665 beta = PetscMax(beta, 0.0); 666 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr); 667 } 668 break; 669 670 case CG_DaiYuan: 671 /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property." 672 SIAM Journal on optimization 10, no. 1 (1999): 177-182. */ 673 if (!cg->diag_scaling){ 674 ierr = VecDot(tao->stepdirection, tao->gradient, &gd);CHKERRQ(ierr); 675 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 676 ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha);CHKERRQ(ierr); 677 beta = tau_k*gnorm2/(gd - gd_old); 678 ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr); 679 } else { 680 ierr = MatMult(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr); 681 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 682 ierr = VecDot(cg->g_work, tao->gradient, >Dg);CHKERRQ(ierr); 683 ierr = VecDot(tao->stepdirection, cg->G_old, &gd_old);CHKERRQ(ierr); 684 ierr = VecDot(cg->d_work, cg->g_work, &dk_yk);CHKERRQ(ierr); 685 dk_yk = dk_yk - gd_old; 686 beta = gtDg/dk_yk; 687 ierr = VecScale(cg->d_work, beta);CHKERRQ(ierr); 688 ierr = VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work);CHKERRQ(ierr); 689 } 690 break; 691 692 case CG_HagerZhang: 693 /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent." 694 ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */ 695 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 696 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 697 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr); 698 snorm = dnorm*step; 699 cg->yts = step*dk_yk; 700 if (cg->use_dynamic_restart){ 701 ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr); 702 } 703 if (cg->dynamic_restart){ 704 ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr); 705 } else { 706 if (!cg->diag_scaling){ 707 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 708 ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr); 709 /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */ 710 tmp = gd/dk_yk; 711 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)); 712 /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */ 713 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 714 /* d <- -t*g + beta*t*d */ 715 ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr); 716 } else { 717 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 718 cg->yty = ynorm2; 719 cg->sts = snorm*snorm; 720 /* Apply the diagonal scaling to all my vectors */ 721 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 722 ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr); 723 ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr); 724 /* Construct the constant ytDgkp1 */ 725 ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr); 726 /* Construct the constant for scaling Dkyk in the update */ 727 tmp = gd/dk_yk; 728 ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr); 729 tau_k = -tau_k*gd/(dk_yk*dk_yk); 730 /* beta is the constant which adds the dk contribution */ 731 beta = gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */ 732 /* From HZ2013, modified to account for diagonal scaling*/ 733 ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr); 734 ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr); 735 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 736 /* Do the update */ 737 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr); 738 } 739 } 740 break; 741 742 case CG_DaiKou: 743 /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search." 744 SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */ 745 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 746 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 747 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr); 748 snorm = step*dnorm; 749 cg->yts = dk_yk*step; 750 if (!cg->diag_scaling){ 751 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 752 ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr); 753 /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */ 754 tmp = gd/dk_yk; 755 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk; 756 beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm)); 757 /* d <- -t*g + beta*t*d */ 758 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk);CHKERRQ(ierr); 759 } else { 760 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 761 cg->yty = ynorm2; 762 cg->sts = snorm*snorm; 763 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 764 ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr); 765 ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr); 766 /* Construct the constant ytDgkp1 */ 767 ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr); 768 ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr); 769 tau_k = tau_k*gd/(dk_yk*dk_yk); 770 tmp = gd/dk_yk; 771 /* beta is the constant which adds the dk contribution */ 772 beta = gkp1_yk/dk_yk - step*tmp - tau_k; 773 /* Update this for the last term in beta */ 774 ierr = VecDot(cg->y_work, tao->stepdirection, &dk_yk);CHKERRQ(ierr); 775 beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */ 776 ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr); 777 ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr); 778 beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm)); 779 /* Do the update */ 780 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr); 781 } 782 break; 783 784 case CG_KouDai: 785 /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization." 786 Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */ 787 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 788 ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr); 789 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr); 790 snorm = step*dnorm; 791 cg->yts = dk_yk*step; 792 if (cg->use_dynamic_restart){ 793 ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr); 794 } 795 if (cg->dynamic_restart){ 796 ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr); 797 } else { 798 if (!cg->diag_scaling){ 799 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 800 ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr); 801 beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 802 if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */ 803 { 804 beta = cg->zeta*tau_k*gd/(dnorm*dnorm); 805 gamma = 0.0; 806 } else { 807 if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk; 808 /* This seems to be very effective when there's no tau_k scaling. 809 This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */ 810 else { 811 gamma = cg->xi*gd/dk_yk; 812 } 813 } 814 /* d <- -t*g + beta*t*d + t*tmp*yk */ 815 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr); 816 } else { 817 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 818 cg->yty = ynorm2; 819 cg->sts = snorm*snorm; 820 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 821 ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr); 822 /* Construct the constant ytDgkp1 */ 823 ierr = VecDot(cg->yk, cg->g_work, &gkp1D_yk);CHKERRQ(ierr); 824 /* Construct the constant for scaling Dkyk in the update */ 825 gamma = gd/dk_yk; 826 /* tau_k = -ytDy/(ytd)^2 * gd */ 827 ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr); 828 tau_k = tau_k*gd/(dk_yk*dk_yk); 829 /* beta is the constant which adds the d_k contribution */ 830 beta = gkp1D_yk/dk_yk - step*gamma - tau_k; 831 /* Here is the requisite check */ 832 ierr = VecDot(tao->stepdirection, cg->g_work, &tmp);CHKERRQ(ierr); 833 if (cg->neg_xi){ 834 /* modified KD implementation */ 835 if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk; 836 else { 837 gamma = cg->xi*gd/dk_yk; 838 } 839 if (beta < cg->zeta*tmp/(dnorm*dnorm)){ 840 beta = cg->zeta*tmp/(dnorm*dnorm); 841 gamma = 0.0; 842 } 843 } else { /* original KD 2015 implementation */ 844 if (beta < cg->zeta*tmp/(dnorm*dnorm)) { 845 beta = cg->zeta*tmp/(dnorm*dnorm); 846 gamma = 0.0; 847 } else { 848 gamma = cg->xi*gd/dk_yk; 849 } 850 } 851 /* Do the update in two steps */ 852 ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr); 853 ierr = VecAXPY(tao->stepdirection, gamma, cg->y_work);CHKERRQ(ierr); 854 } 855 } 856 break; 857 858 case CG_SSML_BFGS: 859 /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory." 860 Discussion Papers 269 (1977). */ 861 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 862 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr); 863 snorm = step*dnorm; 864 cg->yts = dk_yk*step; 865 cg->yty = ynorm2; 866 cg->sts = snorm*snorm; 867 if (!cg->diag_scaling){ 868 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 869 ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr); 870 tmp = gd/dk_yk; 871 beta = tau_k*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*tmp; 872 /* d <- -t*g + beta*t*d + t*tmp*yk */ 873 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr); 874 } else { 875 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 876 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 877 ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr); 878 /* compute scalar gamma */ 879 ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr); 880 ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr); 881 gamma = gd/dk_yk; 882 /* Compute scalar beta */ 883 beta = (gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 884 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 885 ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr); 886 } 887 break; 888 889 case CG_SSML_DFP: 890 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 891 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr); 892 snorm = step*dnorm; 893 cg->yts = dk_yk*step; 894 cg->yty = ynorm2; 895 cg->sts = snorm*snorm; 896 if (!cg->diag_scaling){ 897 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 898 ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr); 899 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 900 tau_k = cg->dfp_scale*tau_k; 901 tmp = tau_k*gkp1_yk/cg->yty; 902 beta = -step*gd/dk_yk; 903 /* d <- -t*g + beta*d + tmp*yk */ 904 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr); 905 } else { 906 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */ 907 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 908 ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr); 909 /* compute scalar gamma */ 910 ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr); 911 ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr); 912 gamma = (gkp1_yk/tmp); 913 /* Compute scalar beta */ 914 beta = -step*gd/dk_yk; 915 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 916 ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr); 917 } 918 break; 919 920 case CG_SSML_BROYDEN: 921 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 922 ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr); 923 snorm = step*dnorm; 924 cg->yts = step*dk_yk; 925 cg->yty = ynorm2; 926 cg->sts = snorm*snorm; 927 if (!cg->diag_scaling){ 928 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 929 ierr = TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale);CHKERRQ(ierr); 930 ierr = TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale);CHKERRQ(ierr); 931 ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr); 932 tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp; 933 /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0, 934 it should reproduce the tau_dfp scaling. Same with dfp_scale. */ 935 tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/cg->yty; 936 beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*gd/dk_yk; 937 /* d <- -t*g + beta*d + tmp*yk */ 938 ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr); 939 } else { 940 /* We have diagonal scaling enabled */ 941 ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr); 942 ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr); 943 /* compute scalar gamma */ 944 ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr); 945 ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr); 946 gamma = cg->theta*gd/dk_yk + (1-cg->theta)*(gkp1_yk/tmp); 947 /* Compute scalar beta */ 948 beta = cg->theta*(gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk; 949 /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 950 ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr); 951 } 952 break; 953 954 default: 955 break; 956 957 } 958 } 959 PetscFunctionReturn(0); 960 } 961 962 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm) 963 { 964 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 965 PetscErrorCode ierr; 966 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 967 PetscReal step=1.0,gnorm2,gd,dnorm=0.0; 968 PetscReal gnorm2_old,f_old,resnorm, gnorm_old; 969 PetscBool pcgd_fallback = PETSC_FALSE; 970 971 PetscFunctionBegin; 972 /* We are now going to perform a line search along the direction. */ 973 /* Store solution and gradient info before it changes */ 974 ierr = VecCopy(tao->solution, cg->X_old);CHKERRQ(ierr); 975 ierr = VecCopy(tao->gradient, cg->G_old);CHKERRQ(ierr); 976 ierr = VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old);CHKERRQ(ierr); 977 978 gnorm_old = gnorm; 979 gnorm2_old = gnorm_old*gnorm_old; 980 f_old = cg->f; 981 /* Perform bounded line search. If we are recycling a solution from a previous */ 982 /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */ 983 if (!(cg->recycle && 0 == tao->niter)){ 984 /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */ 985 ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr); 986 ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr); 987 ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); 988 989 /* Check linesearch failure */ 990 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 991 ++cg->ls_fails; 992 if (cg->cg_type == CG_GradientDescent){ 993 /* Nothing left to do but fail out of the optimization */ 994 step = 0.0; 995 tao->reason = TAO_DIVERGED_LS_FAILURE; 996 } else { 997 /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */ 998 ierr = VecCopy(cg->X_old, tao->solution);CHKERRQ(ierr); 999 ierr = VecCopy(cg->G_old, tao->gradient);CHKERRQ(ierr); 1000 ierr = VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient);CHKERRQ(ierr); 1001 gnorm = gnorm_old; 1002 gnorm2 = gnorm2_old; 1003 cg->f = f_old; 1004 1005 /* Fall back on preconditioned CG (so long as you're not already using it) */ 1006 if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling){ 1007 pcgd_fallback = PETSC_TRUE; 1008 ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback);CHKERRQ(ierr); 1009 1010 ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr); 1011 ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr); 1012 1013 ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr); 1014 ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr); 1015 ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); 1016 1017 pcgd_fallback = PETSC_FALSE; 1018 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){ 1019 /* Going to perform a regular gradient descent step. */ 1020 ++cg->ls_fails; 1021 step = 0.0; 1022 } 1023 } 1024 /* Fall back on the scaled gradient step */ 1025 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){ 1026 ++cg->ls_fails; 1027 ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr); 1028 ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr); 1029 ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr); 1030 ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr); 1031 ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); 1032 } 1033 1034 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){ 1035 /* Nothing left to do but fail out of the optimization */ 1036 ++cg->ls_fails; 1037 step = 0.0; 1038 tao->reason = TAO_DIVERGED_LS_FAILURE; 1039 } else { 1040 /* One of the fallbacks worked. Set them both back equal to false. */ 1041 pcgd_fallback = PETSC_FALSE; 1042 } 1043 } 1044 } 1045 /* Convergence test for line search failure */ 1046 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1047 1048 /* Standard convergence test */ 1049 ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr); 1050 ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr); 1051 if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 1052 ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr); 1053 ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr); 1054 ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); 1055 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 1056 } 1057 /* Assert we have an updated step and we need at least one more iteration. */ 1058 /* Calculate the next direction */ 1059 /* Estimate the active set at the new solution */ 1060 ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr); 1061 /* Compute the projected gradient and its norm */ 1062 ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr); 1063 ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr); 1064 ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr); 1065 gnorm2 = gnorm*gnorm; 1066 1067 /* Calculate some quantities used in the StepDirectionUpdate. */ 1068 ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr); 1069 /* Update the step direction. */ 1070 ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback);CHKERRQ(ierr); 1071 ++tao->niter; 1072 ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr); 1073 1074 if (cg->cg_type != CG_GradientDescent) { 1075 /* Figure out which previously active variables became inactive this iteration */ 1076 ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr); 1077 if (cg->inactive_idx && cg->inactive_old) { 1078 ierr = ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives);CHKERRQ(ierr); 1079 } 1080 /* Selectively reset the CG step those freshly inactive variables */ 1081 if (cg->new_inactives) { 1082 ierr = VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr); 1083 ierr = VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr); 1084 ierr = VecCopy(cg->inactive_grad, cg->inactive_step);CHKERRQ(ierr); 1085 ierr = VecScale(cg->inactive_step, -1.0);CHKERRQ(ierr); 1086 ierr = VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr); 1087 ierr = VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr); 1088 } 1089 /* Verify that this is a descent direction */ 1090 ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr); 1091 ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr); 1092 if (PetscIsInfOrNanReal(gd) || (gd/(dnorm*dnorm) <= -1e10 || gd/(dnorm*dnorm) >= -1e-10)) { 1093 /* Not a descent direction, so we reset back to projected gradient descent */ 1094 ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr); 1095 ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr); 1096 ++cg->descent_error; 1097 } else { 1098 } 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0) 1104 { 1105 TAO_BNCG *cg = (TAO_BNCG*)tao->data; 1106 PetscErrorCode ierr; 1107 1108 PetscFunctionBegin; 1109 ierr = PetscObjectReference((PetscObject)H0);CHKERRQ(ierr); 1110 cg->pc = H0; 1111 PetscFunctionReturn(0); 1112 } 1113