xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision 3e7ff0edd3573be01c8c0fa32db97c3db8fa5c8d)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 
11 /*------------------------------------------------------------*/
12 /* The main solver function
13 
14    f = Remp(W)          This is what the user provides us from the application layer
15    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
16 
17    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
18 */
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "make_grad_node"
22 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = PetscNew(p);CHKERRQ(ierr);
28   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
29   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
30   (*p)->next = NULL;
31   PetscFunctionReturn(0);
32 }
33 
34 #undef __FUNCT__
35 #define __FUNCT__ "destroy_grad_list"
36 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
37 {
38   PetscErrorCode ierr;
39   Vec_Chain      *p = head->next, *q;
40 
41   PetscFunctionBegin;
42   while(p) {
43     q = p->next;
44     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
45     ierr = PetscFree(p);CHKERRQ(ierr);
46     p = q;
47   }
48   head->next = NULL;
49   PetscFunctionReturn(0);
50 }
51 
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "TaoSolve_BMRM"
55 static PetscErrorCode TaoSolve_BMRM(Tao tao)
56 {
57   PetscErrorCode     ierr;
58   TaoConvergedReason reason;
59   TAO_DF             df;
60   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
61 
62   /* Values and pointers to parts of the optimization problem */
63   PetscReal          f = 0.0;
64   Vec                W = tao->solution;
65   Vec                G = tao->gradient;
66   PetscReal          lambda;
67   PetscReal          bt;
68   Vec_Chain          grad_list, *tail_glist, *pgrad;
69   PetscInt           iter = 0;
70   PetscInt           i;
71   PetscMPIInt        rank;
72 
73   /* Used in converged criteria check */
74   PetscReal          reg;
75   PetscReal          jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
76   PetscReal          innerSolverTol;
77   MPI_Comm           comm;
78 
79   PetscFunctionBegin;
80   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
81   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
82   lambda = bmrm->lambda;
83 
84   /* Check Stopping Condition */
85   tao->step = 1.0;
86   max_jtwt = -BMRM_INFTY;
87   min_jw = BMRM_INFTY;
88   innerSolverTol = 1.0;
89   epsilon = 0.0;
90 
91   if (!rank) {
92     ierr = init_df_solver(&df);CHKERRQ(ierr);
93     grad_list.next = NULL;
94     tail_glist = &grad_list;
95   }
96 
97   df.tol = 1e-6;
98   reason = TAO_CONTINUE_ITERATING;
99 
100   /*-----------------Algorithm Begins------------------------*/
101   /* make the scatter */
102   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
103   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
104   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
105 
106   /* NOTE: In application pass the sub-gradient of Remp(W) */
107   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
108   ierr = TaoMonitor(tao,iter,f,1.0,0.0,tao->step,&reason);CHKERRQ(ierr);
109   while (reason == TAO_CONTINUE_ITERATING) {
110     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
111     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
112     bt = f - bt;
113 
114     /* First gather the gradient to the master node */
115     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
116     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
117 
118     /* Bring up the inner solver */
119     if (!rank) {
120       ierr = ensure_df_space(iter+1, &df); CHKERRQ(ierr);
121       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
122       tail_glist->next = pgrad;
123       tail_glist = pgrad;
124 
125       df.a[iter] = 1.0;
126       df.f[iter] = -bt;
127       df.u[iter] = 1.0;
128       df.l[iter] = 0.0;
129 
130       /* set up the Q */
131       pgrad = grad_list.next;
132       for (i=0; i<=iter; i++) {
133         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
134         df.Q[i][iter] = df.Q[iter][i] = reg / lambda;
135         pgrad = pgrad->next;
136       }
137 
138       if (iter > 0) {
139         df.x[iter] = 0.0;
140         ierr = solve(&df); CHKERRQ(ierr);
141       } else
142         df.x[0] = 1.0;
143 
144       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
145       jtwt = 0.0;
146       ierr = VecSet(bmrm->local_w, 0.0); CHKERRQ(ierr);
147       pgrad = grad_list.next;
148       for (i=0; i<=iter; i++) {
149         jtwt -= df.x[i] * df.f[i];
150         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V); CHKERRQ(ierr);
151         pgrad = pgrad->next;
152       }
153 
154       ierr = VecNorm(bmrm->local_w, NORM_2, &reg); CHKERRQ(ierr);
155       reg = 0.5*lambda*reg*reg;
156       jtwt -= reg;
157     } /* end if rank == 0 */
158 
159     /* scatter the new W to all nodes */
160     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
161     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
162 
163     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
164 
165     ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
166     ierr = MPI_Bcast(&reg,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
167 
168     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
169     if (jw < min_jw) min_jw = jw;
170     if (jtwt > max_jtwt) max_jtwt = jtwt;
171 
172     pre_epsilon = epsilon;
173     epsilon = min_jw - jtwt;
174 
175     if (!rank) {
176       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
177       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
178 
179       /* if the annealing doesn't work well, lower the inner solver tolerance */
180       if(pre_epsilon < epsilon) innerSolverTol *= 0.2;
181 
182       df.tol = innerSolverTol*0.5;
183     }
184 
185     iter++;
186     ierr = TaoMonitor(tao,iter,min_jw,epsilon,0.0,tao->step,&reason);CHKERRQ(ierr);
187   }
188 
189   /* free all the memory */
190   if (!rank) {
191     ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
192     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
193   }
194 
195   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
196   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
197   PetscFunctionReturn(0);
198 }
199 
200 
201 /* ---------------------------------------------------------- */
202 
203 #undef __FUNCT__
204 #define __FUNCT__ "TaoSetup_BMRM"
205 static PetscErrorCode TaoSetup_BMRM(Tao tao)
206 {
207 
208   PetscErrorCode ierr;
209 
210   PetscFunctionBegin;
211   /* Allocate some arrays */
212   if (!tao->gradient) {
213     ierr = VecDuplicate(tao->solution, &tao->gradient);   CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 /*------------------------------------------------------------*/
219 #undef __FUNCT__
220 #define __FUNCT__ "TaoDestroy_BMRM"
221 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
222 {
223   PetscErrorCode ierr;
224 
225   PetscFunctionBegin;
226   ierr = PetscFree(tao->data);CHKERRQ(ierr);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "TaoSetFromOptions_BMRM"
232 static PetscErrorCode TaoSetFromOptions_BMRM(Tao tao)
233 {
234   PetscErrorCode ierr;
235   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
236 
237   PetscFunctionBegin;
238   ierr = PetscOptionsHead("BMRM for regularized risk minimization");CHKERRQ(ierr);
239   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,NULL); CHKERRQ(ierr);
240   ierr = PetscOptionsTail();CHKERRQ(ierr);
241   PetscFunctionReturn(0);
242 }
243 
244 /*------------------------------------------------------------*/
245 #undef __FUNCT__
246 #define __FUNCT__ "TaoView_BMRM"
247 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
248 {
249   PetscBool      isascii;
250   PetscErrorCode ierr;
251 
252   PetscFunctionBegin;
253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
254   if (isascii) {
255     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
256     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 /*------------------------------------------------------------*/
262 /*MC
263   TAOBMRM - bundle method for regularized risk minimization
264 
265   Options Database Keys:
266 . - tao_bmrm_lambda - regulariser weight
267 
268   Level: beginner
269 M*/
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "TaoCreate_BMRM"
273 PETSC_EXTERN PetscErrorCode TaoCreate_BMRM(Tao tao)
274 {
275   TAO_BMRM       *bmrm;
276   PetscErrorCode ierr;
277 
278   PetscFunctionBegin;
279   tao->ops->setup = TaoSetup_BMRM;
280   tao->ops->solve = TaoSolve_BMRM;
281   tao->ops->view  = TaoView_BMRM;
282   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
283   tao->ops->destroy = TaoDestroy_BMRM;
284 
285   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
286   bmrm->lambda = 1.0;
287   tao->data = (void*)bmrm;
288 
289   /* Note: May need to be tuned! */
290   tao->max_it = 2048;
291   tao->max_funcs = 300000;
292   tao->fatol = 1e-20;
293   tao->frtol = 1e-25;
294   tao->gatol = 1e-25;
295   tao->grtol = 1e-25;
296 
297   PetscFunctionReturn(0);
298 }
299 
300 #undef __FUNCT__
301 #define __FUNCT__ "init_df_solver"
302 PetscErrorCode init_df_solver(TAO_DF *df)
303 {
304   PetscInt       i, n = INCRE_DIM;
305   PetscErrorCode ierr;
306 
307   PetscFunctionBegin;
308   /* default values */
309   df->maxProjIter = 200;
310   df->maxPGMIter = 300000;
311   df->b = 1.0;
312 
313   /* memory space required by Dai-Fletcher */
314   df->cur_num_cp = n;
315   ierr = PetscMalloc1(n, &df->f); CHKERRQ(ierr);
316   ierr = PetscMalloc1(n, &df->a); CHKERRQ(ierr);
317   ierr = PetscMalloc1(n, &df->l); CHKERRQ(ierr);
318   ierr = PetscMalloc1(n, &df->u); CHKERRQ(ierr);
319   ierr = PetscMalloc1(n, &df->x); CHKERRQ(ierr);
320   ierr = PetscMalloc1(n, &df->Q); CHKERRQ(ierr);
321 
322   for (i = 0; i < n; i ++) {
323     ierr = PetscMalloc1(n, &df->Q[i]); CHKERRQ(ierr);
324   }
325 
326   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
327   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
328   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
329   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
330   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
331   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
332   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
333   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
334   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
335   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
336 
337   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
338   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
339   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
340   PetscFunctionReturn(0);
341 }
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "ensure_df_space"
345 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
346 {
347   PetscErrorCode ierr;
348   PetscReal      *tmp, **tmp_Q;
349   PetscInt       i, n, old_n;
350 
351   PetscFunctionBegin;
352   df->dim = dim;
353   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
354 
355   old_n = df->cur_num_cp;
356   df->cur_num_cp += INCRE_DIM;
357   n = df->cur_num_cp;
358 
359   /* memory space required by dai-fletcher */
360   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
361   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
362   ierr = PetscFree(df->f); CHKERRQ(ierr);
363   df->f = tmp;
364 
365   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
366   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
367   ierr = PetscFree(df->a); CHKERRQ(ierr);
368   df->a = tmp;
369 
370   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
371   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
372   ierr = PetscFree(df->l); CHKERRQ(ierr);
373   df->l = tmp;
374 
375   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
376   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
377   ierr = PetscFree(df->u); CHKERRQ(ierr);
378   df->u = tmp;
379 
380   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
381   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
382   ierr = PetscFree(df->x); CHKERRQ(ierr);
383   df->x = tmp;
384 
385   ierr = PetscMalloc1(n, &tmp_Q); CHKERRQ(ierr);
386   for (i = 0; i < n; i ++) {
387     ierr = PetscMalloc1(n, &tmp_Q[i]); CHKERRQ(ierr);
388     if (i < old_n) {
389       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n); CHKERRQ(ierr);
390       ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
391     }
392   }
393 
394   ierr = PetscFree(df->Q); CHKERRQ(ierr);
395   df->Q = tmp_Q;
396 
397   ierr = PetscFree(df->g); CHKERRQ(ierr);
398   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
399 
400   ierr = PetscFree(df->y); CHKERRQ(ierr);
401   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
402 
403   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
404   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
405 
406   ierr = PetscFree(df->d); CHKERRQ(ierr);
407   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
408 
409   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
410   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
411 
412   ierr = PetscFree(df->t); CHKERRQ(ierr);
413   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
414 
415   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
416   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
417 
418   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
419   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
420 
421   ierr = PetscFree(df->sk); CHKERRQ(ierr);
422   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
423 
424   ierr = PetscFree(df->yk); CHKERRQ(ierr);
425   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
426 
427   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
428   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
429 
430   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
431   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
432 
433   ierr = PetscFree(df->uv); CHKERRQ(ierr);
434   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNCT__
439 #define __FUNCT__ "destroy_df_solver"
440 PetscErrorCode destroy_df_solver(TAO_DF *df)
441 {
442   PetscErrorCode ierr;
443   PetscInt       i;
444 
445   PetscFunctionBegin;
446   ierr = PetscFree(df->f); CHKERRQ(ierr);
447   ierr = PetscFree(df->a); CHKERRQ(ierr);
448   ierr = PetscFree(df->l); CHKERRQ(ierr);
449   ierr = PetscFree(df->u); CHKERRQ(ierr);
450   ierr = PetscFree(df->x); CHKERRQ(ierr);
451 
452   for (i = 0; i < df->cur_num_cp; i ++) {
453     ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
454   }
455   ierr = PetscFree(df->Q); CHKERRQ(ierr);
456   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
457   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
458   ierr = PetscFree(df->uv); CHKERRQ(ierr);
459   ierr = PetscFree(df->g); CHKERRQ(ierr);
460   ierr = PetscFree(df->y); CHKERRQ(ierr);
461   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
462   ierr = PetscFree(df->d); CHKERRQ(ierr);
463   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
464   ierr = PetscFree(df->t); CHKERRQ(ierr);
465   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
466   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
467   ierr = PetscFree(df->sk); CHKERRQ(ierr);
468   ierr = PetscFree(df->yk); CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
473 #undef __FUNCT__
474 #define __FUNCT__ "phi"
475 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
476 {
477   PetscReal r = 0.0;
478   PetscInt  i;
479 
480   for (i = 0; i < n; i++){
481     x[i] = -c[i] + lambda*a[i];
482     if (x[i] > u[i])     x[i] = u[i];
483     else if(x[i] < l[i]) x[i] = l[i];
484     r += a[i]*x[i];
485   }
486   return r - b;
487 }
488 
489 /** Modified Dai-Fletcher QP projector solves the problem:
490  *
491  *      minimise  0.5*x'*x - c'*x
492  *      subj to   a'*x = b
493  *                l \leq x \leq u
494  *
495  *  \param c The point to be projected onto feasible set
496  */
497 #undef __FUNCT__
498 #define __FUNCT__ "project"
499 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
500 {
501   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
502   PetscReal      r, rl, ru, s;
503   PetscInt       innerIter;
504   PetscBool      nonNegativeSlack = PETSC_FALSE;
505   PetscErrorCode ierr;
506 
507   *lam_ext = 0;
508   lambda  = 0;
509   dlambda = 0.5;
510   innerIter = 1;
511 
512   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
513    *
514    *  Optimality conditions for \phi:
515    *
516    *  1. lambda   <= 0
517    *  2. r        <= 0
518    *  3. r*lambda == 0
519    */
520 
521   /* Bracketing Phase */
522   r = phi(x, n, lambda, a, b, c, l, u);
523 
524   if(nonNegativeSlack) {
525     /* inequality constraint, i.e., with \xi >= 0 constraint */
526     if (r < TOL_R) return 0;
527   } else  {
528     /* equality constraint ,i.e., without \xi >= 0 constraint */
529     if (fabs(r) < TOL_R) return 0;
530   }
531 
532   if (r < 0.0){
533     lambdal = lambda;
534     rl      = r;
535     lambda  = lambda + dlambda;
536     r       = phi(x, n, lambda, a, b, c, l, u);
537     while (r < 0.0 && dlambda < BMRM_INFTY)  {
538       lambdal = lambda;
539       s       = rl/r - 1.0;
540       if (s < 0.1) s = 0.1;
541       dlambda = dlambda + dlambda/s;
542       lambda  = lambda + dlambda;
543       rl      = r;
544       r       = phi(x, n, lambda, a, b, c, l, u);
545     }
546     lambdau = lambda;
547     ru      = r;
548   } else {
549     lambdau = lambda;
550     ru      = r;
551     lambda  = lambda - dlambda;
552     r       = phi(x, n, lambda, a, b, c, l, u);
553     while (r > 0.0 && dlambda > -BMRM_INFTY) {
554       lambdau = lambda;
555       s       = ru/r - 1.0;
556       if (s < 0.1) s = 0.1;
557       dlambda = dlambda + dlambda/s;
558       lambda  = lambda - dlambda;
559       ru      = r;
560       r       = phi(x, n, lambda, a, b, c, l, u);
561     }
562     lambdal = lambda;
563     rl      = r;
564   }
565 
566   if(fabs(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
567 
568   if(ru == 0){
569     lambda = lambdau;
570     r = phi(x, n, lambda, a, b, c, l, u);
571     return innerIter;
572   }
573 
574   /* Secant Phase */
575   s       = 1.0 - rl/ru;
576   dlambda = dlambda/s;
577   lambda  = lambdau - dlambda;
578   r       = phi(x, n, lambda, a, b, c, l, u);
579 
580   while (fabs(r) > TOL_R
581          && dlambda > TOL_LAM * (1.0 + fabs(lambda))
582          && innerIter < df->maxProjIter){
583     innerIter++;
584     if (r > 0.0){
585       if (s <= 2.0){
586         lambdau = lambda;
587         ru      = r;
588         s       = 1.0 - rl/ru;
589         dlambda = (lambdau - lambdal) / s;
590         lambda  = lambdau - dlambda;
591       } else {
592         s          = ru/r-1.0;
593         if (s < 0.1) s = 0.1;
594         dlambda    = (lambdau - lambda) / s;
595         lambda_new = 0.75*lambdal + 0.25*lambda;
596         if (lambda_new < (lambda - dlambda))
597           lambda_new = lambda - dlambda;
598         lambdau    = lambda;
599         ru         = r;
600         lambda     = lambda_new;
601         s          = (lambdau - lambdal) / (lambdau - lambda);
602       }
603     } else {
604       if (s >= 2.0){
605         lambdal = lambda;
606         rl      = r;
607         s       = 1.0 - rl/ru;
608         dlambda = (lambdau - lambdal) / s;
609         lambda  = lambdau - dlambda;
610       } else {
611         s          = rl/r - 1.0;
612         if (s < 0.1) s = 0.1;
613         dlambda    = (lambda-lambdal) / s;
614         lambda_new = 0.75*lambdau + 0.25*lambda;
615         if (lambda_new > (lambda + dlambda))
616           lambda_new = lambda + dlambda;
617         lambdal    = lambda;
618         rl         = r;
619         lambda     = lambda_new;
620         s          = (lambdau - lambdal) / (lambdau-lambda);
621       }
622     }
623     r = phi(x, n, lambda, a, b, c, l, u);
624   }
625 
626   *lam_ext = lambda;
627   if(innerIter >= df->maxProjIter) {
628     ierr = PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
629   }
630   return innerIter;
631 }
632 
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "solve"
636 PetscErrorCode solve(TAO_DF *df)
637 {
638   PetscErrorCode ierr;
639   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
640   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
641   PetscReal      DELTAsv, ProdDELTAsv;
642   PetscReal      c, *tempQ;
643   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
644   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
645   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
646   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
647   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
648 
649   /*** variables for the adaptive nonmonotone linesearch ***/
650   PetscInt    L, llast;
651   PetscReal   fr, fbest, fv, fc, fv0;
652 
653   c = BMRM_INFTY;
654 
655   DELTAsv = EPS_SV;
656   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
657   else  ProdDELTAsv = EPS_SV;
658 
659   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
660 
661   lam_ext = 0.0;
662 
663   /* Project the initial solution */
664   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
665 
666   /* Compute gradient
667      g = Q*x + f; */
668 
669   it = 0;
670   for (i = 0; i < dim; i++) {
671     if (fabs(x[i]) > ProdDELTAsv) ipt[it++] = i;
672   }
673 
674   ierr = PetscMemzero(t, dim*sizeof(PetscReal)); CHKERRQ(ierr);
675   for (i = 0; i < it; i++){
676     tempQ = Q[ipt[i]];
677     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
678   }
679   for (i = 0; i < dim; i++){
680     g[i] = t[i] + f[i];
681   }
682 
683 
684   /* y = -(x_{k} - g_{k}) */
685   for (i = 0; i < dim; i++){
686     y[i] = g[i] - x[i];
687   }
688 
689   /* Project x_{k} - g_{k} */
690   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
691 
692   /* y = P(x_{k} - g_{k}) - x_{k} */
693   max = ALPHA_MIN;
694   for (i = 0; i < dim; i++){
695     y[i] = tempv[i] - x[i];
696     if (fabs(y[i]) > max) max = fabs(y[i]);
697   }
698 
699   if (max < tol*1e-3){
700     lscount = 0;
701     innerIter    = 0;
702     return 0;
703   }
704 
705   alpha = 1.0 / max;
706 
707   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
708   fv0   = 0.0;
709   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
710 
711   /*** adaptive nonmonotone linesearch ***/
712   L     = 2;
713   fr    = ALPHA_MAX;
714   fbest = fv0;
715   fc    = fv0;
716   llast = 0;
717   akold = bkold = 0.0;
718 
719   /***      Iterator begins     ***/
720   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
721 
722     /* tempv = -(x_{k} - alpha*g_{k}) */
723     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
724 
725     /* Project x_{k} - alpha*g_{k} */
726     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
727 
728 
729     /* gd = \inner{d_{k}}{g_{k}}
730         d = P(x_{k} - alpha*g_{k}) - x_{k}
731     */
732     gd = 0.0;
733     for (i = 0; i < dim; i++){
734       d[i] = y[i] - x[i];
735       gd  += d[i] * g[i];
736     }
737 
738     /* Gradient computation  */
739 
740     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
741 
742     it = it2 = 0;
743     for (i = 0; i < dim; i++){
744       if (fabs(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
745     }
746     for (i = 0; i < dim; i++) {
747       if (fabs(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
748     }
749 
750     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal)); CHKERRQ(ierr);
751     /* compute Qd = Q*d */
752     if (it < it2){
753       for (i = 0; i < it; i++){
754         tempQ = Q[ipt[i]];
755         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
756       }
757     } else { /* compute Qd = Q*y-t */
758       for (i = 0; i < it2; i++){
759         tempQ = Q[ipt2[i]];
760         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
761       }
762       for (j = 0; j < dim; j++) Qd[j] -= t[j];
763     }
764 
765     /* ak = inner{d_{k}}{d_{k}} */
766     ak = 0.0;
767     for (i = 0; i < dim; i++) ak += d[i] * d[i];
768 
769     bk = 0.0;
770     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
771 
772     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
773     else lamnew = 1.0;
774 
775     /* fv is computing f(x_{k} + d_{k}) */
776     fv = 0.0;
777     for (i = 0; i < dim; i++){
778       xplus[i] = x[i] + d[i];
779       tplus[i] = t[i] + Qd[i];
780       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
781     }
782 
783     /* fr is fref */
784     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
785       lscount++;
786       fv = 0.0;
787       for (i = 0; i < dim; i++){
788         xplus[i] = x[i] + lamnew*d[i];
789         tplus[i] = t[i] + lamnew*Qd[i];
790         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
791       }
792     }
793 
794     for (i = 0; i < dim; i++){
795       sk[i] = xplus[i] - x[i];
796       yk[i] = tplus[i] - t[i];
797       x[i]  = xplus[i];
798       t[i]  = tplus[i];
799       g[i]  = t[i] + f[i];
800     }
801 
802     /* update the line search control parameters */
803     if (fv < fbest){
804       fbest = fv;
805       fc    = fv;
806       llast = 0;
807     } else {
808       fc = (fc > fv ? fc : fv);
809       llast++;
810       if (llast == L){
811         fr    = fc;
812         fc    = fv;
813         llast = 0;
814       }
815     }
816 
817     ak = bk = 0.0;
818     for (i = 0; i < dim; i++){
819       ak += sk[i] * sk[i];
820       bk += sk[i] * yk[i];
821     }
822 
823     if (bk <= EPS*ak) alpha = ALPHA_MAX;
824     else {
825       if (bkold < EPS*akold) alpha = ak/bk;
826       else alpha = (akold+ak)/(bkold+bk);
827 
828       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
829       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
830     }
831 
832     akold = ak;
833     bkold = bk;
834 
835     /*** stopping criterion based on KKT conditions ***/
836     /* at optimal, gradient of lagrangian w.r.t. x is zero */
837 
838     bk = 0.0;
839     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
840 
841     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)){
842       it     = 0;
843       luv    = 0;
844       kktlam = 0.0;
845       for (i = 0; i < dim; i++){
846         /* x[i] is active hence lagrange multipliers for box constraints
847                 are zero. The lagrange multiplier for ineq. const. is then
848                 defined as below
849         */
850         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
851           ipt[it++] = i;
852           kktlam    = kktlam - a[i]*g[i];
853         } else  uv[luv++] = i;
854       }
855 
856       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
857       else {
858         kktlam = kktlam/it;
859         info   = 1;
860         for (i = 0; i < it; i++) {
861           if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
862             info = 0;
863             break;
864           }
865         }
866         if (info == 1)  {
867           for (i = 0; i < luv; i++)  {
868             if (x[uv[i]] <= DELTAsv){
869               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
870                      not be zero. So, the gradient without beta is > 0
871               */
872               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
873                 info = 0;
874                 break;
875               }
876             } else {
877               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
878                      not be zero. So, the gradient without eta is < 0
879               */
880               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
881                 info = 0;
882                 break;
883               }
884             }
885           }
886         }
887 
888         if (info == 1) return 0;
889       }
890     }
891   }
892   return 0;
893 }
894 
895 
896