xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision 4fc747eaadbeca11629f314a99edccbc2ed7b3d3)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 
11 /*------------------------------------------------------------*/
12 /* The main solver function
13 
14    f = Remp(W)          This is what the user provides us from the application layer
15    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
16 
17    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
18 */
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "make_grad_node"
22 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = PetscNew(p);CHKERRQ(ierr);
28   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
29   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
30   (*p)->next = NULL;
31   PetscFunctionReturn(0);
32 }
33 
34 #undef __FUNCT__
35 #define __FUNCT__ "destroy_grad_list"
36 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
37 {
38   PetscErrorCode ierr;
39   Vec_Chain      *p = head->next, *q;
40 
41   PetscFunctionBegin;
42   while(p) {
43     q = p->next;
44     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
45     ierr = PetscFree(p);CHKERRQ(ierr);
46     p = q;
47   }
48   head->next = NULL;
49   PetscFunctionReturn(0);
50 }
51 
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "TaoSolve_BMRM"
55 static PetscErrorCode TaoSolve_BMRM(Tao tao)
56 {
57   PetscErrorCode     ierr;
58   TaoConvergedReason reason;
59   TAO_DF             df;
60   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
61 
62   /* Values and pointers to parts of the optimization problem */
63   PetscReal          f = 0.0;
64   Vec                W = tao->solution;
65   Vec                G = tao->gradient;
66   PetscReal          lambda;
67   PetscReal          bt;
68   Vec_Chain          grad_list, *tail_glist, *pgrad;
69   PetscInt           i;
70   PetscMPIInt        rank;
71 
72   /* Used in converged criteria check */
73   PetscReal          reg;
74   PetscReal          jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
75   PetscReal          innerSolverTol;
76   MPI_Comm           comm;
77 
78   PetscFunctionBegin;
79   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
80   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
81   lambda = bmrm->lambda;
82 
83   /* Check Stopping Condition */
84   tao->step = 1.0;
85   max_jtwt = -BMRM_INFTY;
86   min_jw = BMRM_INFTY;
87   innerSolverTol = 1.0;
88   epsilon = 0.0;
89 
90   if (!rank) {
91     ierr = init_df_solver(&df);CHKERRQ(ierr);
92     grad_list.next = NULL;
93     tail_glist = &grad_list;
94   }
95 
96   df.tol = 1e-6;
97   reason = TAO_CONTINUE_ITERATING;
98 
99   /*-----------------Algorithm Begins------------------------*/
100   /* make the scatter */
101   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
102   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
103   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
104 
105   /* NOTE: In application pass the sub-gradient of Remp(W) */
106   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
107   ierr = TaoMonitor(tao,tao->niter,f,1.0,0.0,tao->step,&reason);CHKERRQ(ierr);
108   while (reason == TAO_CONTINUE_ITERATING) {
109     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
110     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
111     bt = f - bt;
112 
113     /* First gather the gradient to the master node */
114     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
115     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
116 
117     /* Bring up the inner solver */
118     if (!rank) {
119       ierr = ensure_df_space(tao->niter+1, &df);CHKERRQ(ierr);
120       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
121       tail_glist->next = pgrad;
122       tail_glist = pgrad;
123 
124       df.a[tao->niter] = 1.0;
125       df.f[tao->niter] = -bt;
126       df.u[tao->niter] = 1.0;
127       df.l[tao->niter] = 0.0;
128 
129       /* set up the Q */
130       pgrad = grad_list.next;
131       for (i=0; i<=tao->niter; i++) {
132         if (!pgrad) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Assert that there are at least tao->niter+1 pgrad available");
133         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
134         df.Q[i][tao->niter] = df.Q[tao->niter][i] = reg / lambda;
135         pgrad = pgrad->next;
136       }
137 
138       if (tao->niter > 0) {
139         df.x[tao->niter] = 0.0;
140         ierr = solve(&df);CHKERRQ(ierr);
141       } else
142         df.x[0] = 1.0;
143 
144       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
145       jtwt = 0.0;
146       ierr = VecSet(bmrm->local_w, 0.0);CHKERRQ(ierr);
147       pgrad = grad_list.next;
148       for (i=0; i<=tao->niter; i++) {
149         jtwt -= df.x[i] * df.f[i];
150         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V);CHKERRQ(ierr);
151         pgrad = pgrad->next;
152       }
153 
154       ierr = VecNorm(bmrm->local_w, NORM_2, &reg);CHKERRQ(ierr);
155       reg = 0.5*lambda*reg*reg;
156       jtwt -= reg;
157     } /* end if rank == 0 */
158 
159     /* scatter the new W to all nodes */
160     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
161     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
162 
163     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
164 
165     ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
166     ierr = MPI_Bcast(&reg,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
167 
168     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
169     if (jw < min_jw) min_jw = jw;
170     if (jtwt > max_jtwt) max_jtwt = jtwt;
171 
172     pre_epsilon = epsilon;
173     epsilon = min_jw - jtwt;
174 
175     if (!rank) {
176       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
177       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
178 
179       /* if the annealing doesn't work well, lower the inner solver tolerance */
180       if(pre_epsilon < epsilon) innerSolverTol *= 0.2;
181 
182       df.tol = innerSolverTol*0.5;
183     }
184 
185     tao->niter++;
186     ierr = TaoMonitor(tao,tao->niter,min_jw,epsilon,0.0,tao->step,&reason);CHKERRQ(ierr);
187   }
188 
189   /* free all the memory */
190   if (!rank) {
191     ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
192     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
193   }
194 
195   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
196   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
197   PetscFunctionReturn(0);
198 }
199 
200 
201 /* ---------------------------------------------------------- */
202 
203 #undef __FUNCT__
204 #define __FUNCT__ "TaoSetup_BMRM"
205 static PetscErrorCode TaoSetup_BMRM(Tao tao)
206 {
207 
208   PetscErrorCode ierr;
209 
210   PetscFunctionBegin;
211   /* Allocate some arrays */
212   if (!tao->gradient) {
213     ierr = VecDuplicate(tao->solution, &tao->gradient);CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 /*------------------------------------------------------------*/
219 #undef __FUNCT__
220 #define __FUNCT__ "TaoDestroy_BMRM"
221 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
222 {
223   PetscErrorCode ierr;
224 
225   PetscFunctionBegin;
226   ierr = PetscFree(tao->data);CHKERRQ(ierr);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "TaoSetFromOptions_BMRM"
232 static PetscErrorCode TaoSetFromOptions_BMRM(PetscOptionItems *PetscOptionsObject,Tao tao)
233 {
234   PetscErrorCode ierr;
235   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
236 
237   PetscFunctionBegin;
238   ierr = PetscOptionsHead(PetscOptionsObject,"BMRM for regularized risk minimization");CHKERRQ(ierr);
239   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,NULL);CHKERRQ(ierr);
240   ierr = PetscOptionsTail();CHKERRQ(ierr);
241   PetscFunctionReturn(0);
242 }
243 
244 /*------------------------------------------------------------*/
245 #undef __FUNCT__
246 #define __FUNCT__ "TaoView_BMRM"
247 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
248 {
249   PetscBool      isascii;
250   PetscErrorCode ierr;
251 
252   PetscFunctionBegin;
253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
254   if (isascii) {
255     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
256     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 /*------------------------------------------------------------*/
262 /*MC
263   TAOBMRM - bundle method for regularized risk minimization
264 
265   Options Database Keys:
266 . - tao_bmrm_lambda - regulariser weight
267 
268   Level: beginner
269 M*/
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "TaoCreate_BMRM"
273 PETSC_EXTERN PetscErrorCode TaoCreate_BMRM(Tao tao)
274 {
275   TAO_BMRM       *bmrm;
276   PetscErrorCode ierr;
277 
278   PetscFunctionBegin;
279   tao->ops->setup = TaoSetup_BMRM;
280   tao->ops->solve = TaoSolve_BMRM;
281   tao->ops->view  = TaoView_BMRM;
282   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
283   tao->ops->destroy = TaoDestroy_BMRM;
284 
285   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
286   bmrm->lambda = 1.0;
287   tao->data = (void*)bmrm;
288 
289   /* Override default settings (unless already changed) */
290   if (!tao->max_it_changed) tao->max_it = 2000;
291   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
292   if (!tao->gatol_changed) tao->gatol = 1.0e-12;
293   if (!tao->grtol_changed) tao->grtol = 1.0e-12;
294 
295   PetscFunctionReturn(0);
296 }
297 
298 #undef __FUNCT__
299 #define __FUNCT__ "init_df_solver"
300 PetscErrorCode init_df_solver(TAO_DF *df)
301 {
302   PetscInt       i, n = INCRE_DIM;
303   PetscErrorCode ierr;
304 
305   PetscFunctionBegin;
306   /* default values */
307   df->maxProjIter = 200;
308   df->maxPGMIter = 300000;
309   df->b = 1.0;
310 
311   /* memory space required by Dai-Fletcher */
312   df->cur_num_cp = n;
313   ierr = PetscMalloc1(n, &df->f);CHKERRQ(ierr);
314   ierr = PetscMalloc1(n, &df->a);CHKERRQ(ierr);
315   ierr = PetscMalloc1(n, &df->l);CHKERRQ(ierr);
316   ierr = PetscMalloc1(n, &df->u);CHKERRQ(ierr);
317   ierr = PetscMalloc1(n, &df->x);CHKERRQ(ierr);
318   ierr = PetscMalloc1(n, &df->Q);CHKERRQ(ierr);
319 
320   for (i = 0; i < n; i ++) {
321     ierr = PetscMalloc1(n, &df->Q[i]);CHKERRQ(ierr);
322   }
323 
324   ierr = PetscMalloc1(n, &df->g);CHKERRQ(ierr);
325   ierr = PetscMalloc1(n, &df->y);CHKERRQ(ierr);
326   ierr = PetscMalloc1(n, &df->tempv);CHKERRQ(ierr);
327   ierr = PetscMalloc1(n, &df->d);CHKERRQ(ierr);
328   ierr = PetscMalloc1(n, &df->Qd);CHKERRQ(ierr);
329   ierr = PetscMalloc1(n, &df->t);CHKERRQ(ierr);
330   ierr = PetscMalloc1(n, &df->xplus);CHKERRQ(ierr);
331   ierr = PetscMalloc1(n, &df->tplus);CHKERRQ(ierr);
332   ierr = PetscMalloc1(n, &df->sk);CHKERRQ(ierr);
333   ierr = PetscMalloc1(n, &df->yk);CHKERRQ(ierr);
334 
335   ierr = PetscMalloc1(n, &df->ipt);CHKERRQ(ierr);
336   ierr = PetscMalloc1(n, &df->ipt2);CHKERRQ(ierr);
337   ierr = PetscMalloc1(n, &df->uv);CHKERRQ(ierr);
338   PetscFunctionReturn(0);
339 }
340 
341 #undef __FUNCT__
342 #define __FUNCT__ "ensure_df_space"
343 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
344 {
345   PetscErrorCode ierr;
346   PetscReal      *tmp, **tmp_Q;
347   PetscInt       i, n, old_n;
348 
349   PetscFunctionBegin;
350   df->dim = dim;
351   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
352 
353   old_n = df->cur_num_cp;
354   df->cur_num_cp += INCRE_DIM;
355   n = df->cur_num_cp;
356 
357   /* memory space required by dai-fletcher */
358   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
359   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n);CHKERRQ(ierr);
360   ierr = PetscFree(df->f);CHKERRQ(ierr);
361   df->f = tmp;
362 
363   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
364   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n);CHKERRQ(ierr);
365   ierr = PetscFree(df->a);CHKERRQ(ierr);
366   df->a = tmp;
367 
368   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
369   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n);CHKERRQ(ierr);
370   ierr = PetscFree(df->l);CHKERRQ(ierr);
371   df->l = tmp;
372 
373   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
374   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n);CHKERRQ(ierr);
375   ierr = PetscFree(df->u);CHKERRQ(ierr);
376   df->u = tmp;
377 
378   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
379   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n);CHKERRQ(ierr);
380   ierr = PetscFree(df->x);CHKERRQ(ierr);
381   df->x = tmp;
382 
383   ierr = PetscMalloc1(n, &tmp_Q);CHKERRQ(ierr);
384   for (i = 0; i < n; i ++) {
385     ierr = PetscMalloc1(n, &tmp_Q[i]);CHKERRQ(ierr);
386     if (i < old_n) {
387       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n);CHKERRQ(ierr);
388       ierr = PetscFree(df->Q[i]);CHKERRQ(ierr);
389     }
390   }
391 
392   ierr = PetscFree(df->Q);CHKERRQ(ierr);
393   df->Q = tmp_Q;
394 
395   ierr = PetscFree(df->g);CHKERRQ(ierr);
396   ierr = PetscMalloc1(n, &df->g);CHKERRQ(ierr);
397 
398   ierr = PetscFree(df->y);CHKERRQ(ierr);
399   ierr = PetscMalloc1(n, &df->y);CHKERRQ(ierr);
400 
401   ierr = PetscFree(df->tempv);CHKERRQ(ierr);
402   ierr = PetscMalloc1(n, &df->tempv);CHKERRQ(ierr);
403 
404   ierr = PetscFree(df->d);CHKERRQ(ierr);
405   ierr = PetscMalloc1(n, &df->d);CHKERRQ(ierr);
406 
407   ierr = PetscFree(df->Qd);CHKERRQ(ierr);
408   ierr = PetscMalloc1(n, &df->Qd);CHKERRQ(ierr);
409 
410   ierr = PetscFree(df->t);CHKERRQ(ierr);
411   ierr = PetscMalloc1(n, &df->t);CHKERRQ(ierr);
412 
413   ierr = PetscFree(df->xplus);CHKERRQ(ierr);
414   ierr = PetscMalloc1(n, &df->xplus);CHKERRQ(ierr);
415 
416   ierr = PetscFree(df->tplus);CHKERRQ(ierr);
417   ierr = PetscMalloc1(n, &df->tplus);CHKERRQ(ierr);
418 
419   ierr = PetscFree(df->sk);CHKERRQ(ierr);
420   ierr = PetscMalloc1(n, &df->sk);CHKERRQ(ierr);
421 
422   ierr = PetscFree(df->yk);CHKERRQ(ierr);
423   ierr = PetscMalloc1(n, &df->yk);CHKERRQ(ierr);
424 
425   ierr = PetscFree(df->ipt);CHKERRQ(ierr);
426   ierr = PetscMalloc1(n, &df->ipt);CHKERRQ(ierr);
427 
428   ierr = PetscFree(df->ipt2);CHKERRQ(ierr);
429   ierr = PetscMalloc1(n, &df->ipt2);CHKERRQ(ierr);
430 
431   ierr = PetscFree(df->uv);CHKERRQ(ierr);
432   ierr = PetscMalloc1(n, &df->uv);CHKERRQ(ierr);
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "destroy_df_solver"
438 PetscErrorCode destroy_df_solver(TAO_DF *df)
439 {
440   PetscErrorCode ierr;
441   PetscInt       i;
442 
443   PetscFunctionBegin;
444   ierr = PetscFree(df->f);CHKERRQ(ierr);
445   ierr = PetscFree(df->a);CHKERRQ(ierr);
446   ierr = PetscFree(df->l);CHKERRQ(ierr);
447   ierr = PetscFree(df->u);CHKERRQ(ierr);
448   ierr = PetscFree(df->x);CHKERRQ(ierr);
449 
450   for (i = 0; i < df->cur_num_cp; i ++) {
451     ierr = PetscFree(df->Q[i]);CHKERRQ(ierr);
452   }
453   ierr = PetscFree(df->Q);CHKERRQ(ierr);
454   ierr = PetscFree(df->ipt);CHKERRQ(ierr);
455   ierr = PetscFree(df->ipt2);CHKERRQ(ierr);
456   ierr = PetscFree(df->uv);CHKERRQ(ierr);
457   ierr = PetscFree(df->g);CHKERRQ(ierr);
458   ierr = PetscFree(df->y);CHKERRQ(ierr);
459   ierr = PetscFree(df->tempv);CHKERRQ(ierr);
460   ierr = PetscFree(df->d);CHKERRQ(ierr);
461   ierr = PetscFree(df->Qd);CHKERRQ(ierr);
462   ierr = PetscFree(df->t);CHKERRQ(ierr);
463   ierr = PetscFree(df->xplus);CHKERRQ(ierr);
464   ierr = PetscFree(df->tplus);CHKERRQ(ierr);
465   ierr = PetscFree(df->sk);CHKERRQ(ierr);
466   ierr = PetscFree(df->yk);CHKERRQ(ierr);
467   PetscFunctionReturn(0);
468 }
469 
470 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
471 #undef __FUNCT__
472 #define __FUNCT__ "phi"
473 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
474 {
475   PetscReal r = 0.0;
476   PetscInt  i;
477 
478   for (i = 0; i < n; i++){
479     x[i] = -c[i] + lambda*a[i];
480     if (x[i] > u[i])     x[i] = u[i];
481     else if(x[i] < l[i]) x[i] = l[i];
482     r += a[i]*x[i];
483   }
484   return r - b;
485 }
486 
487 /** Modified Dai-Fletcher QP projector solves the problem:
488  *
489  *      minimise  0.5*x'*x - c'*x
490  *      subj to   a'*x = b
491  *                l \leq x \leq u
492  *
493  *  \param c The point to be projected onto feasible set
494  */
495 #undef __FUNCT__
496 #define __FUNCT__ "project"
497 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
498 {
499   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
500   PetscReal      r, rl, ru, s;
501   PetscInt       innerIter;
502   PetscBool      nonNegativeSlack = PETSC_FALSE;
503   PetscErrorCode ierr;
504 
505   *lam_ext = 0;
506   lambda  = 0;
507   dlambda = 0.5;
508   innerIter = 1;
509 
510   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
511    *
512    *  Optimality conditions for \phi:
513    *
514    *  1. lambda   <= 0
515    *  2. r        <= 0
516    *  3. r*lambda == 0
517    */
518 
519   /* Bracketing Phase */
520   r = phi(x, n, lambda, a, b, c, l, u);
521 
522   if(nonNegativeSlack) {
523     /* inequality constraint, i.e., with \xi >= 0 constraint */
524     if (r < TOL_R) return 0;
525   } else  {
526     /* equality constraint ,i.e., without \xi >= 0 constraint */
527     if (fabs(r) < TOL_R) return 0;
528   }
529 
530   if (r < 0.0){
531     lambdal = lambda;
532     rl      = r;
533     lambda  = lambda + dlambda;
534     r       = phi(x, n, lambda, a, b, c, l, u);
535     while (r < 0.0 && dlambda < BMRM_INFTY)  {
536       lambdal = lambda;
537       s       = rl/r - 1.0;
538       if (s < 0.1) s = 0.1;
539       dlambda = dlambda + dlambda/s;
540       lambda  = lambda + dlambda;
541       rl      = r;
542       r       = phi(x, n, lambda, a, b, c, l, u);
543     }
544     lambdau = lambda;
545     ru      = r;
546   } else {
547     lambdau = lambda;
548     ru      = r;
549     lambda  = lambda - dlambda;
550     r       = phi(x, n, lambda, a, b, c, l, u);
551     while (r > 0.0 && dlambda > -BMRM_INFTY) {
552       lambdau = lambda;
553       s       = ru/r - 1.0;
554       if (s < 0.1) s = 0.1;
555       dlambda = dlambda + dlambda/s;
556       lambda  = lambda - dlambda;
557       ru      = r;
558       r       = phi(x, n, lambda, a, b, c, l, u);
559     }
560     lambdal = lambda;
561     rl      = r;
562   }
563 
564   if(fabs(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
565 
566   if(ru == 0){
567     return innerIter;
568   }
569 
570   /* Secant Phase */
571   s       = 1.0 - rl/ru;
572   dlambda = dlambda/s;
573   lambda  = lambdau - dlambda;
574   r       = phi(x, n, lambda, a, b, c, l, u);
575 
576   while (fabs(r) > TOL_R
577          && dlambda > TOL_LAM * (1.0 + fabs(lambda))
578          && innerIter < df->maxProjIter){
579     innerIter++;
580     if (r > 0.0){
581       if (s <= 2.0){
582         lambdau = lambda;
583         ru      = r;
584         s       = 1.0 - rl/ru;
585         dlambda = (lambdau - lambdal) / s;
586         lambda  = lambdau - dlambda;
587       } else {
588         s          = ru/r-1.0;
589         if (s < 0.1) s = 0.1;
590         dlambda    = (lambdau - lambda) / s;
591         lambda_new = 0.75*lambdal + 0.25*lambda;
592         if (lambda_new < (lambda - dlambda))
593           lambda_new = lambda - dlambda;
594         lambdau    = lambda;
595         ru         = r;
596         lambda     = lambda_new;
597         s          = (lambdau - lambdal) / (lambdau - lambda);
598       }
599     } else {
600       if (s >= 2.0){
601         lambdal = lambda;
602         rl      = r;
603         s       = 1.0 - rl/ru;
604         dlambda = (lambdau - lambdal) / s;
605         lambda  = lambdau - dlambda;
606       } else {
607         s          = rl/r - 1.0;
608         if (s < 0.1) s = 0.1;
609         dlambda    = (lambda-lambdal) / s;
610         lambda_new = 0.75*lambdau + 0.25*lambda;
611         if (lambda_new > (lambda + dlambda))
612           lambda_new = lambda + dlambda;
613         lambdal    = lambda;
614         rl         = r;
615         lambda     = lambda_new;
616         s          = (lambdau - lambdal) / (lambdau-lambda);
617       }
618     }
619     r = phi(x, n, lambda, a, b, c, l, u);
620   }
621 
622   *lam_ext = lambda;
623   if(innerIter >= df->maxProjIter) {
624     ierr = PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
625   }
626   return innerIter;
627 }
628 
629 
630 #undef __FUNCT__
631 #define __FUNCT__ "solve"
632 PetscErrorCode solve(TAO_DF *df)
633 {
634   PetscErrorCode ierr;
635   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
636   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
637   PetscReal      DELTAsv, ProdDELTAsv;
638   PetscReal      c, *tempQ;
639   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
640   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
641   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
642   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
643   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
644 
645   /*** variables for the adaptive nonmonotone linesearch ***/
646   PetscInt    L, llast;
647   PetscReal   fr, fbest, fv, fc, fv0;
648 
649   c = BMRM_INFTY;
650 
651   DELTAsv = EPS_SV;
652   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
653   else  ProdDELTAsv = EPS_SV;
654 
655   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
656 
657   lam_ext = 0.0;
658 
659   /* Project the initial solution */
660   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
661 
662   /* Compute gradient
663      g = Q*x + f; */
664 
665   it = 0;
666   for (i = 0; i < dim; i++) {
667     if (fabs(x[i]) > ProdDELTAsv) ipt[it++] = i;
668   }
669 
670   ierr = PetscMemzero(t, dim*sizeof(PetscReal));CHKERRQ(ierr);
671   for (i = 0; i < it; i++){
672     tempQ = Q[ipt[i]];
673     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
674   }
675   for (i = 0; i < dim; i++){
676     g[i] = t[i] + f[i];
677   }
678 
679 
680   /* y = -(x_{k} - g_{k}) */
681   for (i = 0; i < dim; i++){
682     y[i] = g[i] - x[i];
683   }
684 
685   /* Project x_{k} - g_{k} */
686   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
687 
688   /* y = P(x_{k} - g_{k}) - x_{k} */
689   max = ALPHA_MIN;
690   for (i = 0; i < dim; i++){
691     y[i] = tempv[i] - x[i];
692     if (fabs(y[i]) > max) max = fabs(y[i]);
693   }
694 
695   if (max < tol*1e-3){
696     return 0;
697   }
698 
699   alpha = 1.0 / max;
700 
701   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
702   fv0   = 0.0;
703   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
704 
705   /*** adaptive nonmonotone linesearch ***/
706   L     = 2;
707   fr    = ALPHA_MAX;
708   fbest = fv0;
709   fc    = fv0;
710   llast = 0;
711   akold = bkold = 0.0;
712 
713   /***      Iterator begins     ***/
714   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
715 
716     /* tempv = -(x_{k} - alpha*g_{k}) */
717     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
718 
719     /* Project x_{k} - alpha*g_{k} */
720     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
721 
722 
723     /* gd = \inner{d_{k}}{g_{k}}
724         d = P(x_{k} - alpha*g_{k}) - x_{k}
725     */
726     gd = 0.0;
727     for (i = 0; i < dim; i++){
728       d[i] = y[i] - x[i];
729       gd  += d[i] * g[i];
730     }
731 
732     /* Gradient computation  */
733 
734     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
735 
736     it = it2 = 0;
737     for (i = 0; i < dim; i++){
738       if (fabs(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
739     }
740     for (i = 0; i < dim; i++) {
741       if (fabs(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
742     }
743 
744     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal));CHKERRQ(ierr);
745     /* compute Qd = Q*d */
746     if (it < it2){
747       for (i = 0; i < it; i++){
748         tempQ = Q[ipt[i]];
749         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
750       }
751     } else { /* compute Qd = Q*y-t */
752       for (i = 0; i < it2; i++){
753         tempQ = Q[ipt2[i]];
754         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
755       }
756       for (j = 0; j < dim; j++) Qd[j] -= t[j];
757     }
758 
759     /* ak = inner{d_{k}}{d_{k}} */
760     ak = 0.0;
761     for (i = 0; i < dim; i++) ak += d[i] * d[i];
762 
763     bk = 0.0;
764     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
765 
766     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
767     else lamnew = 1.0;
768 
769     /* fv is computing f(x_{k} + d_{k}) */
770     fv = 0.0;
771     for (i = 0; i < dim; i++){
772       xplus[i] = x[i] + d[i];
773       tplus[i] = t[i] + Qd[i];
774       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
775     }
776 
777     /* fr is fref */
778     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
779       lscount++;
780       fv = 0.0;
781       for (i = 0; i < dim; i++){
782         xplus[i] = x[i] + lamnew*d[i];
783         tplus[i] = t[i] + lamnew*Qd[i];
784         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
785       }
786     }
787 
788     for (i = 0; i < dim; i++){
789       sk[i] = xplus[i] - x[i];
790       yk[i] = tplus[i] - t[i];
791       x[i]  = xplus[i];
792       t[i]  = tplus[i];
793       g[i]  = t[i] + f[i];
794     }
795 
796     /* update the line search control parameters */
797     if (fv < fbest){
798       fbest = fv;
799       fc    = fv;
800       llast = 0;
801     } else {
802       fc = (fc > fv ? fc : fv);
803       llast++;
804       if (llast == L){
805         fr    = fc;
806         fc    = fv;
807         llast = 0;
808       }
809     }
810 
811     ak = bk = 0.0;
812     for (i = 0; i < dim; i++){
813       ak += sk[i] * sk[i];
814       bk += sk[i] * yk[i];
815     }
816 
817     if (bk <= EPS*ak) alpha = ALPHA_MAX;
818     else {
819       if (bkold < EPS*akold) alpha = ak/bk;
820       else alpha = (akold+ak)/(bkold+bk);
821 
822       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
823       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
824     }
825 
826     akold = ak;
827     bkold = bk;
828 
829     /*** stopping criterion based on KKT conditions ***/
830     /* at optimal, gradient of lagrangian w.r.t. x is zero */
831 
832     bk = 0.0;
833     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
834 
835     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)){
836       it     = 0;
837       luv    = 0;
838       kktlam = 0.0;
839       for (i = 0; i < dim; i++){
840         /* x[i] is active hence lagrange multipliers for box constraints
841                 are zero. The lagrange multiplier for ineq. const. is then
842                 defined as below
843         */
844         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
845           ipt[it++] = i;
846           kktlam    = kktlam - a[i]*g[i];
847         } else  uv[luv++] = i;
848       }
849 
850       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
851       else {
852         kktlam = kktlam/it;
853         info   = 1;
854         for (i = 0; i < it; i++) {
855           if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
856             info = 0;
857             break;
858           }
859         }
860         if (info == 1)  {
861           for (i = 0; i < luv; i++)  {
862             if (x[uv[i]] <= DELTAsv){
863               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
864                      not be zero. So, the gradient without beta is > 0
865               */
866               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
867                 info = 0;
868                 break;
869               }
870             } else {
871               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
872                      not be zero. So, the gradient without eta is < 0
873               */
874               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
875                 info = 0;
876                 break;
877               }
878             }
879           }
880         }
881 
882         if (info == 1) return 0;
883       }
884     }
885   }
886   return 0;
887 }
888 
889 
890