1 #include "bmrm.h" 2 3 static PetscErrorCode init_df_solver(TAO_DF*); 4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*); 5 static PetscErrorCode destroy_df_solver(TAO_DF*); 6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*, 7 PetscReal,PetscReal*,PetscReal*,PetscReal*); 8 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*, 9 PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*); 10 11 static PetscErrorCode solve(TAO_DF*); 12 13 14 /*------------------------------------------------------------*/ 15 /* The main solver function 16 17 f = Remp(W) This is what the user provides us from the application layer 18 So the ComputeGradient function for instance should get us back the subgradient of Remp(W) 19 20 Regularizer assumed to be L2 norm = lambda*0.5*W'W () 21 */ 22 23 #undef __FUNCT__ 24 #define __FUNCT__ "make_grad_node" 25 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p) 26 { 27 PetscErrorCode ierr; 28 29 PetscFunctionBegin; 30 31 ierr = PetscMalloc(sizeof(Vec_Chain), p); CHKERRQ(ierr); 32 ierr = VecDuplicate(X, &(*p)->V); CHKERRQ(ierr); 33 ierr = VecCopy(X, (*p)->V); CHKERRQ(ierr); 34 (*p)->next = PETSC_NULL; 35 36 PetscFunctionReturn(0); 37 } 38 39 40 #undef __FUNCT__ 41 #define __FUNCT__ "destroy_grad_list" 42 static PetscErrorCode destroy_grad_list(Vec_Chain *head) 43 { 44 PetscErrorCode ierr; 45 Vec_Chain *p = head->next, *q; 46 47 PetscFunctionBegin; 48 while(p) { 49 q = p->next; 50 ierr = VecDestroy(&p->V); CHKERRQ(ierr); 51 ierr = PetscFree(p); CHKERRQ(ierr); 52 p = q; 53 } 54 head->next = PETSC_NULL; 55 56 PetscFunctionReturn(0); 57 } 58 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "TaoSolve_BMRM" 62 static PetscErrorCode TaoSolve_BMRM(TaoSolver tao) 63 { 64 PetscErrorCode ierr; 65 TaoSolverTerminationReason reason; 66 TAO_DF df; 67 TAO_BMRM *bmrm = (TAO_BMRM*)tao->data; 68 69 /* Values and pointers to parts of the optimization problem */ 70 PetscReal f = 0.0; 71 Vec W = tao->solution; 72 Vec G = tao->gradient; 73 PetscReal lambda; 74 75 PetscReal bt; 76 Vec_Chain grad_list, *tail_glist, *pgrad; 77 78 PetscInt iter = 0; 79 PetscInt i; 80 PetscMPIInt rank; 81 82 /* Used in termination criteria check */ 83 PetscReal reg; 84 PetscReal jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw; 85 PetscReal innerSolverTol; 86 87 PetscFunctionBegin; 88 89 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank); CHKERRQ(ierr); 90 lambda = bmrm->lambda; 91 92 /* Check Stopping Condition */ 93 tao->step = 1.0; 94 max_jtwt = -BMRM_INFTY; 95 min_jw = BMRM_INFTY; 96 innerSolverTol = 1.0; 97 epsilon = 0.0; 98 99 if (rank == 0) { 100 ierr = init_df_solver(&df); CHKERRQ(ierr); 101 grad_list.next = NULL; 102 tail_glist = &grad_list; 103 } 104 105 df.tol = 1e-6; 106 reason = TAO_CONTINUE_ITERATING; 107 108 /*-----------------Algorithm Begins------------------------*/ 109 /* make the scatter */ 110 ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w); CHKERRQ(ierr); 111 ierr = VecAssemblyBegin(bmrm->local_w); CHKERRQ(ierr); 112 ierr = VecAssemblyEnd(bmrm->local_w); CHKERRQ(ierr); 113 114 /* NOTE: In application pass the sub-gradient of Remp(W) */ 115 ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G); CHKERRQ(ierr); 116 ierr = TaoMonitor(tao,iter,f,1.0,0.0,tao->step,&reason); CHKERRQ(ierr); 117 while (reason == TAO_CONTINUE_ITERATING) { 118 /* compute bt = Remp(Wt-1) - <Wt-1, At> */ 119 ierr = VecDot(W, G, &bt); CHKERRQ(ierr); 120 bt = f - bt; 121 122 /* First gather the gradient to the master node */ 123 ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr); 124 ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr); 125 126 /* Bring up the inner solver */ 127 if (rank == 0) { 128 ierr = ensure_df_space(iter+1, &df); CHKERRQ(ierr); 129 ierr = make_grad_node(bmrm->local_w, &pgrad); CHKERRQ(ierr); 130 tail_glist->next = pgrad; 131 tail_glist = pgrad; 132 133 df.a[iter] = 1.0; 134 df.f[iter] = -bt; 135 df.u[iter] = 1.0; 136 df.l[iter] = 0.0; 137 138 /* set up the Q */ 139 pgrad = grad_list.next; 140 for (i=0; i<=iter; i++) { 141 ierr = VecDot(pgrad->V, bmrm->local_w, ®); CHKERRQ(ierr); 142 df.Q[i][iter] = df.Q[iter][i] = reg / lambda; 143 pgrad = pgrad->next; 144 } 145 146 if (iter > 0) { 147 df.x[iter] = 0.0; 148 ierr = solve(&df); CHKERRQ(ierr); 149 } 150 else 151 df.x[0] = 1.0; 152 153 /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */ 154 jtwt = 0.0; 155 ierr = VecSet(bmrm->local_w, 0.0); CHKERRQ(ierr); 156 pgrad = grad_list.next; 157 for (i=0; i<=iter; i++) { 158 jtwt -= df.x[i] * df.f[i]; 159 ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V); CHKERRQ(ierr); 160 pgrad = pgrad->next; 161 } 162 163 ierr = VecNorm(bmrm->local_w, NORM_2, ®); CHKERRQ(ierr); 164 reg = 0.5*lambda*reg*reg; 165 jtwt -= reg; 166 } /* end if rank == 0 */ 167 168 /* scatter the new W to all nodes */ 169 ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE); CHKERRQ(ierr); 170 ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE); CHKERRQ(ierr); 171 172 ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G); CHKERRQ(ierr); 173 174 MPI_Bcast(&jtwt,1,MPI_DOUBLE,0,PETSC_COMM_WORLD); 175 MPI_Bcast(®,1,MPI_DOUBLE,0,PETSC_COMM_WORLD); 176 177 jw = reg + f; /* J(w) = regularizer + Remp(w) */ 178 if (jw < min_jw) 179 min_jw = jw; 180 if (jtwt > max_jtwt) 181 max_jtwt = jtwt; 182 183 pre_epsilon = epsilon; 184 epsilon = min_jw - jtwt; 185 186 if (rank == 0) { 187 if (innerSolverTol > epsilon) 188 innerSolverTol = epsilon; 189 else if (innerSolverTol < 1e-7) 190 innerSolverTol = 1e-7; 191 192 /* if the annealing doesn't work well, lower the inner solver tolerance */ 193 if(pre_epsilon < epsilon) 194 innerSolverTol *= 0.2; 195 196 df.tol = innerSolverTol*0.5; 197 } 198 199 iter++; 200 ierr = TaoMonitor(tao,iter,min_jw,epsilon,0.0,tao->step,&reason); CHKERRQ(ierr); 201 } 202 203 /* free all the memory */ 204 if (rank == 0) { 205 ierr = destroy_grad_list(&grad_list); CHKERRQ(ierr); 206 ierr = destroy_df_solver(&df); CHKERRQ(ierr); 207 } 208 209 ierr = VecDestroy(&bmrm->local_w); CHKERRQ(ierr); 210 ierr = VecScatterDestroy(&bmrm->scatter); CHKERRQ(ierr); 211 212 PetscFunctionReturn(0); 213 } 214 215 216 /* ---------------------------------------------------------- */ 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "TaoSetup_BMRM" 220 static PetscErrorCode TaoSetup_BMRM(TaoSolver tao) { 221 222 PetscErrorCode ierr; 223 224 PetscFunctionBegin; 225 226 /* Allocate some arrays */ 227 if (!tao->gradient) { 228 ierr = VecDuplicate(tao->solution, &tao->gradient); CHKERRQ(ierr); 229 } 230 231 PetscFunctionReturn(0); 232 } 233 234 235 /*------------------------------------------------------------*/ 236 237 #undef __FUNCT__ 238 #define __FUNCT__ "TaoDestroy_BMRM" 239 static PetscErrorCode TaoDestroy_BMRM(TaoSolver tao) 240 { 241 PetscErrorCode ierr; 242 243 /* Free allocated memory in custom BMRM structure */ 244 PetscFunctionBegin; 245 ierr = PetscFree(tao->data); CHKERRQ(ierr); 246 tao->data = PETSC_NULL; 247 248 PetscFunctionReturn(0); 249 } 250 251 252 #undef __FUNCT__ 253 #define __FUNCT__ "TaoSetFromOptions_BMRM" 254 static PetscErrorCode TaoSetFromOptions_BMRM(TaoSolver tao) 255 { 256 PetscErrorCode ierr; 257 TAO_BMRM* bmrm = (TAO_BMRM*)tao->data; 258 PetscBool flg; 259 260 PetscFunctionBegin; 261 ierr = PetscOptionsHead("BMRM for regularized risk minimization");CHKERRQ(ierr); 262 ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,&flg); CHKERRQ(ierr); 263 ierr = PetscOptionsTail();CHKERRQ(ierr); 264 PetscFunctionReturn(0); 265 } 266 267 268 /*------------------------------------------------------------*/ 269 #undef __FUNCT__ 270 #define __FUNCT__ "TaoView_BMRM" 271 static PetscErrorCode TaoView_BMRM(TaoSolver tao, PetscViewer viewer) 272 { 273 PetscBool isascii; 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr); 278 if (isascii) { 279 ierr = PetscViewerASCIIPushTab(viewer); CHKERRQ(ierr); 280 ierr = PetscViewerASCIIPopTab(viewer); CHKERRQ(ierr); 281 } 282 else{ 283 SETERRQ1(((PetscObject)tao)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for TAO BMRM",((PetscObject)viewer)->type_name); 284 } 285 286 PetscFunctionReturn(0); 287 } 288 289 290 /*------------------------------------------------------------*/ 291 EXTERN_C_BEGIN 292 #undef __FUNCT__ 293 #define __FUNCT__ "TaoCreate_BMRM" 294 PetscErrorCode TaoCreate_BMRM(TaoSolver tao) 295 { 296 TAO_BMRM *bmrm; 297 PetscErrorCode ierr; 298 299 PetscFunctionBegin; 300 tao->ops->setup = TaoSetup_BMRM; 301 tao->ops->solve = TaoSolve_BMRM; 302 tao->ops->view = TaoView_BMRM; 303 tao->ops->setfromoptions = TaoSetFromOptions_BMRM; 304 tao->ops->destroy = TaoDestroy_BMRM; 305 306 ierr = PetscNewLog(tao,&bmrm); CHKERRQ(ierr); 307 bmrm->lambda = 1.0; 308 tao->data = (void*)bmrm; 309 310 /* Note: May need to be tuned! */ 311 tao->max_it = 2048; 312 tao->max_funcs = 300000; 313 tao->fatol = 1e-20; 314 tao->frtol = 1e-25; 315 tao->gatol = 1e-25; 316 tao->grtol = 1e-25; 317 318 PetscFunctionReturn(0); 319 } 320 EXTERN_C_END 321 322 323 324 #undef __FUNCT__ 325 #define __FUNCT__ "init_df_solver" 326 PetscErrorCode init_df_solver(TAO_DF *df) 327 { 328 PetscInt i, n = INCRE_DIM; 329 PetscErrorCode ierr; 330 331 PetscFunctionBegin; 332 333 /* default values */ 334 df->maxProjIter = 200; 335 df->maxPGMIter = 300000; 336 df->b = 1.0; 337 338 /* memory space required by Dai-Fletcher */ 339 df->cur_num_cp = n; 340 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->f); CHKERRQ(ierr); 341 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->a); CHKERRQ(ierr); 342 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->l); CHKERRQ(ierr); 343 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->u); CHKERRQ(ierr); 344 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->x); CHKERRQ(ierr); 345 ierr = PetscMalloc(sizeof(PetscReal*)*n, &df->Q); CHKERRQ(ierr); 346 347 for (i = 0; i < n; i ++) { 348 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->Q[i]); CHKERRQ(ierr); 349 } 350 351 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->g); CHKERRQ(ierr); 352 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->y); CHKERRQ(ierr); 353 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tempv); CHKERRQ(ierr); 354 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->d); CHKERRQ(ierr); 355 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->Qd); CHKERRQ(ierr); 356 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->t); CHKERRQ(ierr); 357 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->xplus); CHKERRQ(ierr); 358 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tplus); CHKERRQ(ierr); 359 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->sk); CHKERRQ(ierr); 360 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->yk); CHKERRQ(ierr); 361 362 ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt); CHKERRQ(ierr); 363 ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt2); CHKERRQ(ierr); 364 ierr = PetscMalloc(sizeof(PetscInt)*n, &df->uv); CHKERRQ(ierr); 365 366 PetscFunctionReturn(0); 367 } 368 369 370 #undef __FUNCT__ 371 #define __FUNCT__ "ensure_df_space" 372 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df) 373 { 374 PetscErrorCode ierr; 375 PetscReal *tmp, **tmp_Q; 376 PetscInt i, n, old_n; 377 378 df->dim = dim; 379 if (dim <= df->cur_num_cp) 380 return 0; 381 382 PetscFunctionBegin; 383 384 old_n = df->cur_num_cp; 385 df->cur_num_cp += INCRE_DIM; 386 n = df->cur_num_cp; 387 388 /* memory space required by dai-fletcher */ 389 ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp); CHKERRQ(ierr); 390 ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n); CHKERRQ(ierr); 391 ierr = PetscFree(df->f); CHKERRQ(ierr); 392 df->f = tmp; 393 394 ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp); CHKERRQ(ierr); 395 ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n); CHKERRQ(ierr); 396 ierr = PetscFree(df->a); CHKERRQ(ierr); 397 df->a = tmp; 398 399 ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp); CHKERRQ(ierr); 400 ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n); CHKERRQ(ierr); 401 ierr = PetscFree(df->l); CHKERRQ(ierr); 402 df->l = tmp; 403 404 ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp); CHKERRQ(ierr); 405 ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n); CHKERRQ(ierr); 406 ierr = PetscFree(df->u); CHKERRQ(ierr); 407 df->u = tmp; 408 409 ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp); CHKERRQ(ierr); 410 ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n); CHKERRQ(ierr); 411 ierr = PetscFree(df->x); CHKERRQ(ierr); 412 df->x = tmp; 413 414 ierr = PetscMalloc(sizeof(PetscReal*)*n, &tmp_Q); CHKERRQ(ierr); 415 for (i = 0; i < n; i ++) 416 { 417 ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp_Q[i]); CHKERRQ(ierr); 418 if (i < old_n) 419 { 420 ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n); CHKERRQ(ierr); 421 ierr = PetscFree(df->Q[i]); CHKERRQ(ierr); 422 } 423 } 424 425 ierr = PetscFree(df->Q); CHKERRQ(ierr); 426 df->Q = tmp_Q; 427 428 ierr = PetscFree(df->g); CHKERRQ(ierr); 429 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->g); CHKERRQ(ierr); 430 431 ierr = PetscFree(df->y); CHKERRQ(ierr); 432 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->y); CHKERRQ(ierr); 433 434 ierr = PetscFree(df->tempv); CHKERRQ(ierr); 435 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tempv); CHKERRQ(ierr); 436 437 ierr = PetscFree(df->d); CHKERRQ(ierr); 438 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->d); CHKERRQ(ierr); 439 440 ierr = PetscFree(df->Qd); CHKERRQ(ierr); 441 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->Qd); CHKERRQ(ierr); 442 443 ierr = PetscFree(df->t); CHKERRQ(ierr); 444 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->t); CHKERRQ(ierr); 445 446 ierr = PetscFree(df->xplus); CHKERRQ(ierr); 447 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->xplus); CHKERRQ(ierr); 448 449 ierr = PetscFree(df->tplus); CHKERRQ(ierr); 450 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tplus); CHKERRQ(ierr); 451 452 ierr = PetscFree(df->sk); CHKERRQ(ierr); 453 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->sk); CHKERRQ(ierr); 454 455 ierr = PetscFree(df->yk); CHKERRQ(ierr); 456 ierr = PetscMalloc(sizeof(PetscReal)*n, &df->yk); CHKERRQ(ierr); 457 458 ierr = PetscFree(df->ipt); CHKERRQ(ierr); 459 ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt); CHKERRQ(ierr); 460 461 ierr = PetscFree(df->ipt2); CHKERRQ(ierr); 462 ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt2); CHKERRQ(ierr); 463 464 ierr = PetscFree(df->uv); CHKERRQ(ierr); 465 ierr = PetscMalloc(sizeof(PetscInt)*n, &df->uv); CHKERRQ(ierr); 466 467 PetscFunctionReturn(0); 468 } 469 470 471 #undef __FUNCT__ 472 #define __FUNCT__ "destroy_df_solver" 473 PetscErrorCode destroy_df_solver(TAO_DF *df) 474 { 475 PetscErrorCode ierr; 476 PetscInt i; 477 PetscFunctionBegin; 478 479 if (df->f) { 480 ierr = PetscFree(df->f); CHKERRQ(ierr); df->f = PETSC_NULL; 481 } 482 483 if (df->a) { 484 ierr = PetscFree(df->a); CHKERRQ(ierr); df->a = PETSC_NULL; 485 } 486 487 if (df->l) { 488 ierr = PetscFree(df->l); CHKERRQ(ierr); df->l = PETSC_NULL; 489 } 490 491 if (df->u) { 492 ierr = PetscFree(df->u); CHKERRQ(ierr); df->u = PETSC_NULL; 493 } 494 495 if (df->x) { 496 ierr = PetscFree(df->x); CHKERRQ(ierr); df->x = PETSC_NULL; 497 } 498 499 for (i = 0; i < df->cur_num_cp; i ++) 500 { 501 ierr = PetscFree(df->Q[i]); CHKERRQ(ierr); 502 } 503 ierr = PetscFree(df->Q); CHKERRQ(ierr); 504 505 506 if (df->ipt) { 507 ierr = PetscFree(df->ipt); CHKERRQ(ierr); df->ipt = PETSC_NULL; 508 } 509 if (df->ipt2) { 510 ierr = PetscFree(df->ipt2); CHKERRQ(ierr); df->ipt2 = PETSC_NULL; 511 } 512 if (df->uv) { 513 ierr = PetscFree(df->uv); CHKERRQ(ierr); df->uv = PETSC_NULL; 514 } 515 if (df->g) { 516 ierr = PetscFree(df->g); CHKERRQ(ierr); df->g = PETSC_NULL; 517 } 518 if (df->y) { 519 ierr = PetscFree(df->y); CHKERRQ(ierr); df->y = PETSC_NULL; 520 } 521 if (df->tempv) { 522 ierr = PetscFree(df->tempv); CHKERRQ(ierr); df->tempv = PETSC_NULL; 523 } 524 if (df->d) { 525 ierr = PetscFree(df->d); CHKERRQ(ierr); df->d = PETSC_NULL; 526 } 527 if (df->Qd) { 528 ierr = PetscFree(df->Qd); CHKERRQ(ierr); df->Qd = PETSC_NULL; 529 } 530 if (df->t) { 531 ierr = PetscFree(df->t); CHKERRQ(ierr); df->t = PETSC_NULL; 532 } 533 if (df->xplus) { 534 ierr = PetscFree(df->xplus); CHKERRQ(ierr); df->xplus = PETSC_NULL; 535 } 536 if (df->tplus) { 537 ierr = PetscFree(df->tplus); CHKERRQ(ierr); df->tplus = PETSC_NULL; 538 } 539 if (df->sk) { 540 ierr = PetscFree(df->sk); CHKERRQ(ierr); df->sk = PETSC_NULL; 541 } 542 if (df->yk) { 543 ierr = PetscFree(df->yk); CHKERRQ(ierr); df->yk = PETSC_NULL; 544 } 545 546 PetscFunctionReturn(0); 547 } 548 549 550 /* Piecewise linear monotone target function for the Dai-Fletcher projector */ 551 #undef __FUNCT__ 552 #define __FUNCT__ "phi" 553 PetscReal phi(PetscReal *x, 554 PetscInt n, 555 PetscReal lambda, 556 PetscReal *a, 557 PetscReal b, 558 PetscReal *c, 559 PetscReal *l, 560 PetscReal *u) 561 { 562 PetscReal r = 0.0; 563 PetscInt i; 564 565 for (i = 0; i < n; i++){ 566 x[i] = -c[i] + lambda*a[i]; 567 if (x[i] > u[i]) 568 x[i] = u[i]; 569 else if(x[i] < l[i]) 570 x[i] = l[i]; 571 r += a[i]*x[i]; 572 } 573 return r - b; 574 } 575 576 577 578 /** Modified Dai-Fletcher QP projector solves the problem: 579 * 580 * minimise 0.5*x'*x - c'*x 581 * subj to a'*x = b 582 * l \leq x \leq u 583 * 584 * \param c The point to be projected onto feasible set 585 */ 586 #undef __FUNCT__ 587 #define __FUNCT__ "project" 588 PetscInt project(PetscInt n, 589 PetscReal *a, 590 PetscReal b, 591 PetscReal *c, 592 PetscReal *l, 593 PetscReal *u, 594 PetscReal *x, 595 PetscReal *lam_ext, 596 TAO_DF *df) 597 { 598 PetscReal lambda, lambdal, lambdau, dlambda, lambda_new; 599 PetscReal r, rl, ru, s; 600 PetscInt innerIter; 601 PetscBool nonNegativeSlack = PETSC_FALSE; 602 /* PetscBool nonNegativeSlack = PETSC_TRUE; */ 603 604 *lam_ext = 0; 605 lambda = 0; 606 dlambda = 0.5; 607 innerIter = 1; 608 609 /* \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b) 610 * 611 * Optimality conditions for \phi: 612 * 613 * 1. lambda <= 0 614 * 2. r <= 0 615 * 3. r*lambda == 0 616 */ 617 618 /* Bracketing Phase */ 619 r = phi(x, n, lambda, a, b, c, l, u); 620 621 if(nonNegativeSlack) 622 { 623 /* inequality constraint, i.e., with \xi >= 0 constraint */ 624 if (r < TOL_R) 625 return 0; 626 } 627 else 628 { 629 /* equality constraint ,i.e., without \xi >= 0 constraint */ 630 if (fabs(r) < TOL_R) 631 return 0; 632 } 633 634 if (r < 0.0){ 635 lambdal = lambda; 636 rl = r; 637 lambda = lambda + dlambda; 638 r = phi(x, n, lambda, a, b, c, l, u); 639 while (r < 0.0 && dlambda < BMRM_INFTY) { 640 lambdal = lambda; 641 s = rl/r - 1.0; 642 if (s < 0.1) s = 0.1; 643 dlambda = dlambda + dlambda/s; 644 lambda = lambda + dlambda; 645 rl = r; 646 r = phi(x, n, lambda, a, b, c, l, u); 647 } 648 lambdau = lambda; 649 ru = r; 650 } 651 else { 652 lambdau = lambda; 653 ru = r; 654 lambda = lambda - dlambda; 655 r = phi(x, n, lambda, a, b, c, l, u); 656 while (r > 0.0 && dlambda > -BMRM_INFTY) { 657 lambdau = lambda; 658 s = ru/r - 1.0; 659 if (s < 0.1) s = 0.1; 660 dlambda = dlambda + dlambda/s; 661 lambda = lambda - dlambda; 662 ru = r; 663 r = phi(x, n, lambda, a, b, c, l, u); 664 } 665 lambdal = lambda; 666 rl = r; 667 } 668 669 if(fabs(dlambda) > BMRM_INFTY) { 670 PetscPrintf(PETSC_COMM_SELF, "ERROR: L2N2_DaiFletcherPGM detected Infeasible QP problem!\n"); 671 exit(0); 672 } 673 674 if(ru == 0){ 675 lambda = lambdau; 676 r = phi(x, n, lambda, a, b, c, l, u); 677 return innerIter; 678 } 679 680 /* Secant Phase */ 681 s = 1.0 - rl/ru; 682 dlambda = dlambda/s; 683 lambda = lambdau - dlambda; 684 r = phi(x, n, lambda, a, b, c, l, u); 685 686 while (fabs(r) > TOL_R 687 && dlambda > TOL_LAM * (1.0 + fabs(lambda)) 688 && innerIter < df->maxProjIter){ 689 innerIter++; 690 if (r > 0.0){ 691 if (s <= 2.0){ 692 lambdau = lambda; 693 ru = r; 694 s = 1.0 - rl/ru; 695 dlambda = (lambdau - lambdal) / s; 696 lambda = lambdau - dlambda; 697 } 698 else { 699 s = ru/r-1.0; 700 if (s < 0.1) s = 0.1; 701 dlambda = (lambdau - lambda) / s; 702 lambda_new = 0.75*lambdal + 0.25*lambda; 703 if (lambda_new < (lambda - dlambda)) 704 lambda_new = lambda - dlambda; 705 lambdau = lambda; 706 ru = r; 707 lambda = lambda_new; 708 s = (lambdau - lambdal) / (lambdau - lambda); 709 } 710 } 711 else { 712 if (s >= 2.0){ 713 lambdal = lambda; 714 rl = r; 715 s = 1.0 - rl/ru; 716 dlambda = (lambdau - lambdal) / s; 717 lambda = lambdau - dlambda; 718 } 719 else { 720 s = rl/r - 1.0; 721 if (s < 0.1) s = 0.1; 722 dlambda = (lambda-lambdal) / s; 723 lambda_new = 0.75*lambdau + 0.25*lambda; 724 if (lambda_new > (lambda + dlambda)) 725 lambda_new = lambda + dlambda; 726 lambdal = lambda; 727 rl = r; 728 lambda = lambda_new; 729 s = (lambdau - lambdal) / (lambdau-lambda); 730 } 731 } 732 r = phi(x, n, lambda, a, b, c, l, u); 733 } 734 735 *lam_ext = lambda; 736 if(innerIter >= df->maxProjIter) 737 PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n"); 738 739 return innerIter; 740 } 741 742 743 #undef __FUNCT__ 744 #define __FUNCT__ "solve" 745 PetscErrorCode solve(TAO_DF *df) 746 { 747 PetscErrorCode ierr; 748 PetscInt i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0; 749 PetscReal gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext; 750 PetscReal DELTAsv, ProdDELTAsv; 751 PetscReal c, *tempQ; 752 PetscReal *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol; 753 PetscReal *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd; 754 PetscReal *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk; 755 PetscReal **Q = df->Q, *f = df->f, *t = df->t; 756 PetscInt dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv; 757 758 /*** variables for the adaptive nonmonotone linesearch ***/ 759 PetscInt L, llast; 760 PetscReal fr, fbest, fv, fc, fv0; 761 c = BMRM_INFTY; 762 763 DELTAsv = EPS_SV; 764 if (tol <= 1.0e-5 || dim <= 20) 765 ProdDELTAsv = 0.0F; 766 else 767 ProdDELTAsv = EPS_SV; 768 769 for (i = 0; i < dim; i++) 770 tempv[i] = -x[i]; 771 772 lam_ext = 0.0; 773 774 /* Project the initial solution */ 775 projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df); 776 777 /* Compute gradient 778 g = Q*x + f; */ 779 780 it = 0; 781 for (i = 0; i < dim; i++) 782 if (fabs(x[i]) > ProdDELTAsv) 783 ipt[it++] = i; 784 785 ierr = PetscMemzero(t, dim*sizeof(PetscReal)); CHKERRQ(ierr); 786 for (i = 0; i < it; i++){ 787 tempQ = Q[ipt[i]]; 788 for (j = 0; j < dim; j++) 789 t[j] += (tempQ[j]*x[ipt[i]]); 790 } 791 for (i = 0; i < dim; i++){ 792 g[i] = t[i] + f[i]; 793 } 794 795 796 /* y = -(x_{k} - g_{k}) */ 797 for (i = 0; i < dim; i++){ 798 y[i] = g[i] - x[i]; 799 } 800 801 /* Project x_{k} - g_{k} */ 802 projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df); 803 804 /* y = P(x_{k} - g_{k}) - x_{k} */ 805 max = ALPHA_MIN; 806 for (i = 0; i < dim; i++){ 807 y[i] = tempv[i] - x[i]; 808 if (fabs(y[i]) > max) 809 max = fabs(y[i]); 810 } 811 812 if (max < tol*1e-3){ 813 lscount = 0; 814 innerIter = 0; 815 return 0; 816 } 817 818 alpha = 1.0 / max; 819 820 /* fv0 = f(x_{0}). Recall t = Q x_{k} */ 821 fv0 = 0.0; 822 for (i = 0; i < dim; i++) 823 fv0 += x[i] * (0.5*t[i] + f[i]); 824 825 /*** adaptive nonmonotone linesearch ***/ 826 L = 2; 827 fr = ALPHA_MAX; 828 fbest = fv0; 829 fc = fv0; 830 llast = 0; 831 akold = bkold = 0.0; 832 833 /*** Iterator begins ***/ 834 for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) { 835 836 /* tempv = -(x_{k} - alpha*g_{k}) */ 837 for (i = 0; i < dim; i++) 838 tempv[i] = alpha*g[i] - x[i]; 839 840 /* Project x_{k} - alpha*g_{k} */ 841 projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df); 842 843 844 /* gd = \inner{d_{k}}{g_{k}} 845 d = P(x_{k} - alpha*g_{k}) - x_{k} 846 */ 847 gd = 0.0; 848 for (i = 0; i < dim; i++){ 849 d[i] = y[i] - x[i]; 850 gd += d[i] * g[i]; 851 } 852 853 /* Gradient computation */ 854 855 /* compute Qd = Q*d or Qd = Q*y - t depending on their sparsity */ 856 857 it = it2 = 0; 858 for (i = 0; i < dim; i++) 859 if (fabs(d[i]) > (ProdDELTAsv*1.0e-2)) 860 ipt[it++] = i; 861 for (i = 0; i < dim; i++) 862 if (fabs(y[i]) > ProdDELTAsv) 863 ipt2[it2++] = i; 864 865 ierr = PetscMemzero(Qd, dim*sizeof(PetscReal)); CHKERRQ(ierr); 866 /* compute Qd = Q*d */ 867 if (it < it2){ 868 for (i = 0; i < it; i++){ 869 tempQ = Q[ipt[i]]; 870 for (j = 0; j < dim; j++) 871 Qd[j] += (tempQ[j] * d[ipt[i]]); 872 } 873 } 874 else { /* compute Qd = Q*y-t */ 875 for (i = 0; i < it2; i++){ 876 tempQ = Q[ipt2[i]]; 877 for (j = 0; j < dim; j++) 878 Qd[j] += (tempQ[j] * y[ipt2[i]]); 879 } 880 for (j = 0; j < dim; j++) 881 Qd[j] -= t[j]; 882 } 883 884 885 /* ak = inner{d_{k}}{d_{k}} */ 886 ak = 0.0; 887 for (i = 0; i < dim; i++) 888 ak += d[i] * d[i]; 889 890 bk = 0.0; 891 for (i = 0; i < dim; i++) 892 bk += d[i]*Qd[i]; 893 894 if (bk > EPS*ak && gd < 0.0) /* ak is normd */ 895 lamnew = -gd/bk; 896 else 897 lamnew = 1.0; 898 899 /* fv is computing f(x_{k} + d_{k}) */ 900 fv = 0.0; 901 for (i = 0; i < dim; i++){ 902 xplus[i] = x[i] + d[i]; 903 tplus[i] = t[i] + Qd[i]; 904 fv += xplus[i] * (0.5*tplus[i] + f[i]); 905 } 906 907 /* fr is fref */ 908 if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){ 909 lscount++; 910 fv = 0.0; 911 for (i = 0; i < dim; i++){ 912 xplus[i] = x[i] + lamnew*d[i]; 913 tplus[i] = t[i] + lamnew*Qd[i]; 914 fv += xplus[i] * (0.5*tplus[i] + f[i]); 915 } 916 } 917 918 for (i = 0; i < dim; i++){ 919 sk[i] = xplus[i] - x[i]; 920 yk[i] = tplus[i] - t[i]; 921 x[i] = xplus[i]; 922 t[i] = tplus[i]; 923 g[i] = t[i] + f[i]; 924 } 925 926 927 /* update the line search control parameters */ 928 929 if (fv < fbest){ 930 fbest = fv; 931 fc = fv; 932 llast = 0; 933 } 934 else { 935 fc = (fc > fv ? fc : fv); 936 llast++; 937 if (llast == L){ 938 fr = fc; 939 fc = fv; 940 llast = 0; 941 } 942 } 943 944 ak = bk = 0.0; 945 for (i = 0; i < dim; i++){ 946 ak += sk[i] * sk[i]; 947 bk += sk[i] * yk[i]; 948 } 949 950 if (bk <= EPS*ak) 951 alpha = ALPHA_MAX; 952 else{ 953 if (bkold < EPS*akold) 954 alpha = ak/bk; 955 else 956 alpha = (akold+ak)/(bkold+bk); 957 958 if (alpha > ALPHA_MAX) 959 alpha = ALPHA_MAX; 960 else if (alpha < ALPHA_MIN) 961 alpha = ALPHA_MIN; 962 } 963 964 akold = ak; 965 bkold = bk; 966 967 968 /*** stopping criterion based on KKT conditions ***/ 969 /* at optimal, gradient of lagrangian w.r.t. x is zero */ 970 971 bk = 0.0; 972 for (i = 0; i < dim; i++) 973 bk += x[i] * x[i]; 974 975 if (sqrt(ak) < tol*10 * sqrt(bk)){ 976 it = 0; 977 luv = 0; 978 kktlam = 0.0; 979 for (i = 0; i < dim; i++){ 980 /* x[i] is active hence lagrange multipliers for box constraints 981 are zero. The lagrange multiplier for ineq. const. is then 982 defined as below 983 */ 984 if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){ 985 ipt[it++] = i; 986 kktlam = kktlam - a[i]*g[i]; 987 } 988 else 989 uv[luv++] = i; 990 } 991 992 if (it == 0 && sqrt(ak) < tol*0.5 * sqrt(bk)) 993 return 0; 994 else { 995 kktlam = kktlam/it; 996 info = 1; 997 for (i = 0; i < it; i++) { 998 if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) { 999 info = 0; 1000 break; 1001 } 1002 } 1003 if (info == 1) { 1004 for (i = 0; i < luv; i++) { 1005 if (x[uv[i]] <= DELTAsv){ 1006 /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may 1007 not be zero. So, the gradient without beta is > 0 1008 */ 1009 if (g[uv[i]] + kktlam*a[uv[i]] < -tol){ 1010 info = 0; 1011 break; 1012 } 1013 } 1014 else { 1015 /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may 1016 not be zero. So, the gradient without eta is < 0 1017 */ 1018 if (g[uv[i]] + kktlam*a[uv[i]] > tol) { 1019 info = 0; 1020 break; 1021 } 1022 } 1023 } 1024 } 1025 1026 if (info == 1) 1027 return 0; 1028 } 1029 } 1030 } 1031 return 0; 1032 } 1033 1034 1035