xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision cf1aed2ce99d23e50336629af3ca8cf096900abb)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 
11 /*------------------------------------------------------------*/
12 /* The main solver function
13 
14    f = Remp(W)          This is what the user provides us from the application layer
15    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
16 
17    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
18 */
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "make_grad_node"
22 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = PetscNew(p);CHKERRQ(ierr);
28   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
29   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
30   (*p)->next = NULL;
31   PetscFunctionReturn(0);
32 }
33 
34 #undef __FUNCT__
35 #define __FUNCT__ "destroy_grad_list"
36 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
37 {
38   PetscErrorCode ierr;
39   Vec_Chain      *p = head->next, *q;
40 
41   PetscFunctionBegin;
42   while(p) {
43     q = p->next;
44     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
45     ierr = PetscFree(p);CHKERRQ(ierr);
46     p = q;
47   }
48   head->next = NULL;
49   PetscFunctionReturn(0);
50 }
51 
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "TaoSolve_BMRM"
55 static PetscErrorCode TaoSolve_BMRM(Tao tao)
56 {
57   PetscErrorCode     ierr;
58   TaoConvergedReason reason;
59   TAO_DF             df;
60   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
61 
62   /* Values and pointers to parts of the optimization problem */
63   PetscReal          f = 0.0;
64   Vec                W = tao->solution;
65   Vec                G = tao->gradient;
66   PetscReal          lambda;
67   PetscReal          bt;
68   Vec_Chain          grad_list, *tail_glist, *pgrad;
69   PetscInt           iter = 0;
70   PetscInt           i;
71   PetscMPIInt        rank;
72 
73   /* Used in converged criteria check */
74   PetscReal          reg;
75   PetscReal          jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
76   PetscReal          innerSolverTol;
77   MPI_Comm           comm;
78 
79   PetscFunctionBegin;
80   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
81   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
82   lambda = bmrm->lambda;
83 
84   /* Check Stopping Condition */
85   tao->step = 1.0;
86   max_jtwt = -BMRM_INFTY;
87   min_jw = BMRM_INFTY;
88   innerSolverTol = 1.0;
89   epsilon = 0.0;
90 
91   if (!rank) {
92     ierr = init_df_solver(&df);CHKERRQ(ierr);
93     grad_list.next = NULL;
94     tail_glist = &grad_list;
95   }
96 
97   df.tol = 1e-6;
98   reason = TAO_CONTINUE_ITERATING;
99 
100   /*-----------------Algorithm Begins------------------------*/
101   /* make the scatter */
102   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
103   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
104   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
105 
106   /* NOTE: In application pass the sub-gradient of Remp(W) */
107   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
108   ierr = TaoMonitor(tao,iter,f,1.0,0.0,tao->step,&reason);CHKERRQ(ierr);
109   while (reason == TAO_CONTINUE_ITERATING) {
110     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
111     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
112     bt = f - bt;
113 
114     /* First gather the gradient to the master node */
115     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
116     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
117 
118     /* Bring up the inner solver */
119     if (!rank) {
120       ierr = ensure_df_space(iter+1, &df); CHKERRQ(ierr);
121       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
122       tail_glist->next = pgrad;
123       tail_glist = pgrad;
124 
125       df.a[iter] = 1.0;
126       df.f[iter] = -bt;
127       df.u[iter] = 1.0;
128       df.l[iter] = 0.0;
129 
130       /* set up the Q */
131       pgrad = grad_list.next;
132       for (i=0; i<=iter; i++) {
133         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
134         df.Q[i][iter] = df.Q[iter][i] = reg / lambda;
135         pgrad = pgrad->next;
136       }
137 
138       if (iter > 0) {
139         df.x[iter] = 0.0;
140         ierr = solve(&df); CHKERRQ(ierr);
141       } else
142         df.x[0] = 1.0;
143 
144       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
145       jtwt = 0.0;
146       ierr = VecSet(bmrm->local_w, 0.0); CHKERRQ(ierr);
147       pgrad = grad_list.next;
148       for (i=0; i<=iter; i++) {
149         jtwt -= df.x[i] * df.f[i];
150         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V); CHKERRQ(ierr);
151         pgrad = pgrad->next;
152       }
153 
154       ierr = VecNorm(bmrm->local_w, NORM_2, &reg); CHKERRQ(ierr);
155       reg = 0.5*lambda*reg*reg;
156       jtwt -= reg;
157     } /* end if rank == 0 */
158 
159     /* scatter the new W to all nodes */
160     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
161     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
162 
163     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
164 
165     ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
166     ierr = MPI_Bcast(&reg,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
167 
168     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
169     if (jw < min_jw) min_jw = jw;
170     if (jtwt > max_jtwt) max_jtwt = jtwt;
171 
172     pre_epsilon = epsilon;
173     epsilon = min_jw - jtwt;
174 
175     if (!rank) {
176       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
177       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
178 
179       /* if the annealing doesn't work well, lower the inner solver tolerance */
180       if(pre_epsilon < epsilon) innerSolverTol *= 0.2;
181 
182       df.tol = innerSolverTol*0.5;
183     }
184 
185     iter++;
186     ierr = TaoMonitor(tao,iter,min_jw,epsilon,0.0,tao->step,&reason);CHKERRQ(ierr);
187   }
188 
189   /* free all the memory */
190   if (!rank) {
191     ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
192     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
193   }
194 
195   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
196   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
197   PetscFunctionReturn(0);
198 }
199 
200 
201 /* ---------------------------------------------------------- */
202 
203 #undef __FUNCT__
204 #define __FUNCT__ "TaoSetup_BMRM"
205 static PetscErrorCode TaoSetup_BMRM(Tao tao)
206 {
207 
208   PetscErrorCode ierr;
209 
210   PetscFunctionBegin;
211   /* Allocate some arrays */
212   if (!tao->gradient) {
213     ierr = VecDuplicate(tao->solution, &tao->gradient);   CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 /*------------------------------------------------------------*/
219 #undef __FUNCT__
220 #define __FUNCT__ "TaoDestroy_BMRM"
221 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
222 {
223   PetscErrorCode ierr;
224 
225   PetscFunctionBegin;
226   ierr = PetscFree(tao->data);CHKERRQ(ierr);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "TaoSetFromOptions_BMRM"
232 static PetscErrorCode TaoSetFromOptions_BMRM(Tao tao)
233 {
234   PetscErrorCode ierr;
235   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
236   PetscBool      flg;
237 
238   PetscFunctionBegin;
239   ierr = PetscOptionsHead("BMRM for regularized risk minimization");CHKERRQ(ierr);
240   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,&flg); CHKERRQ(ierr);
241   ierr = PetscOptionsTail();CHKERRQ(ierr);
242   PetscFunctionReturn(0);
243 }
244 
245 /*------------------------------------------------------------*/
246 #undef __FUNCT__
247 #define __FUNCT__ "TaoView_BMRM"
248 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
249 {
250   PetscBool      isascii;
251   PetscErrorCode ierr;
252 
253   PetscFunctionBegin;
254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
255   if (isascii) {
256     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
257     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 /*------------------------------------------------------------*/
263 /*MC
264   TAOBMRM - bundle method for regularized risk minimization
265 
266   Options Database Keys:
267 . - tao_bmrm_lambda - regulariser weight
268 
269   Level: beginner
270 M*/
271 
272 EXTERN_C_BEGIN
273 #undef __FUNCT__
274 #define __FUNCT__ "TaoCreate_BMRM"
275 PetscErrorCode TaoCreate_BMRM(Tao tao)
276 {
277   TAO_BMRM       *bmrm;
278   PetscErrorCode ierr;
279 
280   PetscFunctionBegin;
281   tao->ops->setup = TaoSetup_BMRM;
282   tao->ops->solve = TaoSolve_BMRM;
283   tao->ops->view  = TaoView_BMRM;
284   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
285   tao->ops->destroy = TaoDestroy_BMRM;
286 
287   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
288   bmrm->lambda = 1.0;
289   tao->data = (void*)bmrm;
290 
291   /* Note: May need to be tuned! */
292   tao->max_it = 2048;
293   tao->max_funcs = 300000;
294   tao->fatol = 1e-20;
295   tao->frtol = 1e-25;
296   tao->gatol = 1e-25;
297   tao->grtol = 1e-25;
298 
299   PetscFunctionReturn(0);
300 }
301 EXTERN_C_END
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "init_df_solver"
305 PetscErrorCode init_df_solver(TAO_DF *df)
306 {
307   PetscInt       i, n = INCRE_DIM;
308   PetscErrorCode ierr;
309 
310   PetscFunctionBegin;
311   /* default values */
312   df->maxProjIter = 200;
313   df->maxPGMIter = 300000;
314   df->b = 1.0;
315 
316   /* memory space required by Dai-Fletcher */
317   df->cur_num_cp = n;
318   ierr = PetscMalloc1(n, &df->f); CHKERRQ(ierr);
319   ierr = PetscMalloc1(n, &df->a); CHKERRQ(ierr);
320   ierr = PetscMalloc1(n, &df->l); CHKERRQ(ierr);
321   ierr = PetscMalloc1(n, &df->u); CHKERRQ(ierr);
322   ierr = PetscMalloc1(n, &df->x); CHKERRQ(ierr);
323   ierr = PetscMalloc1(n, &df->Q); CHKERRQ(ierr);
324 
325   for (i = 0; i < n; i ++) {
326     ierr = PetscMalloc1(n, &df->Q[i]); CHKERRQ(ierr);
327   }
328 
329   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
330   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
331   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
332   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
333   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
334   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
335   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
336   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
337   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
338   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
339 
340   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
341   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
342   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
343   PetscFunctionReturn(0);
344 }
345 
346 #undef __FUNCT__
347 #define __FUNCT__ "ensure_df_space"
348 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
349 {
350   PetscErrorCode ierr;
351   PetscReal      *tmp, **tmp_Q;
352   PetscInt       i, n, old_n;
353 
354   PetscFunctionBegin;
355   df->dim = dim;
356   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
357 
358   old_n = df->cur_num_cp;
359   df->cur_num_cp += INCRE_DIM;
360   n = df->cur_num_cp;
361 
362   /* memory space required by dai-fletcher */
363   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
364   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
365   ierr = PetscFree(df->f); CHKERRQ(ierr);
366   df->f = tmp;
367 
368   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
369   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
370   ierr = PetscFree(df->a); CHKERRQ(ierr);
371   df->a = tmp;
372 
373   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
374   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
375   ierr = PetscFree(df->l); CHKERRQ(ierr);
376   df->l = tmp;
377 
378   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
379   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
380   ierr = PetscFree(df->u); CHKERRQ(ierr);
381   df->u = tmp;
382 
383   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
384   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
385   ierr = PetscFree(df->x); CHKERRQ(ierr);
386   df->x = tmp;
387 
388   ierr = PetscMalloc1(n, &tmp_Q); CHKERRQ(ierr);
389   for (i = 0; i < n; i ++) {
390     ierr = PetscMalloc1(n, &tmp_Q[i]); CHKERRQ(ierr);
391     if (i < old_n) {
392       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n); CHKERRQ(ierr);
393       ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
394     }
395   }
396 
397   ierr = PetscFree(df->Q); CHKERRQ(ierr);
398   df->Q = tmp_Q;
399 
400   ierr = PetscFree(df->g); CHKERRQ(ierr);
401   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
402 
403   ierr = PetscFree(df->y); CHKERRQ(ierr);
404   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
405 
406   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
407   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
408 
409   ierr = PetscFree(df->d); CHKERRQ(ierr);
410   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
411 
412   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
413   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
414 
415   ierr = PetscFree(df->t); CHKERRQ(ierr);
416   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
417 
418   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
419   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
420 
421   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
422   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
423 
424   ierr = PetscFree(df->sk); CHKERRQ(ierr);
425   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
426 
427   ierr = PetscFree(df->yk); CHKERRQ(ierr);
428   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
429 
430   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
431   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
432 
433   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
434   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
435 
436   ierr = PetscFree(df->uv); CHKERRQ(ierr);
437   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "destroy_df_solver"
443 PetscErrorCode destroy_df_solver(TAO_DF *df)
444 {
445   PetscErrorCode ierr;
446   PetscInt       i;
447 
448   PetscFunctionBegin;
449   ierr = PetscFree(df->f); CHKERRQ(ierr);
450   ierr = PetscFree(df->a); CHKERRQ(ierr);
451   ierr = PetscFree(df->l); CHKERRQ(ierr);
452   ierr = PetscFree(df->u); CHKERRQ(ierr);
453   ierr = PetscFree(df->x); CHKERRQ(ierr);
454 
455   for (i = 0; i < df->cur_num_cp; i ++) {
456     ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
457   }
458   ierr = PetscFree(df->Q); CHKERRQ(ierr);
459   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
460   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
461   ierr = PetscFree(df->uv); CHKERRQ(ierr);
462   ierr = PetscFree(df->g); CHKERRQ(ierr);
463   ierr = PetscFree(df->y); CHKERRQ(ierr);
464   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
465   ierr = PetscFree(df->d); CHKERRQ(ierr);
466   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
467   ierr = PetscFree(df->t); CHKERRQ(ierr);
468   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
469   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
470   ierr = PetscFree(df->sk); CHKERRQ(ierr);
471   ierr = PetscFree(df->yk); CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
476 #undef __FUNCT__
477 #define __FUNCT__ "phi"
478 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
479 {
480   PetscReal r = 0.0;
481   PetscInt  i;
482 
483   for (i = 0; i < n; i++){
484     x[i] = -c[i] + lambda*a[i];
485     if (x[i] > u[i])     x[i] = u[i];
486     else if(x[i] < l[i]) x[i] = l[i];
487     r += a[i]*x[i];
488   }
489   return r - b;
490 }
491 
492 /** Modified Dai-Fletcher QP projector solves the problem:
493  *
494  *      minimise  0.5*x'*x - c'*x
495  *      subj to   a'*x = b
496  *                l \leq x \leq u
497  *
498  *  \param c The point to be projected onto feasible set
499  */
500 #undef __FUNCT__
501 #define __FUNCT__ "project"
502 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
503 {
504   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
505   PetscReal      r, rl, ru, s;
506   PetscInt       innerIter;
507   PetscBool      nonNegativeSlack = PETSC_FALSE;
508   PetscErrorCode ierr;
509 
510   *lam_ext = 0;
511   lambda  = 0;
512   dlambda = 0.5;
513   innerIter = 1;
514 
515   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
516    *
517    *  Optimality conditions for \phi:
518    *
519    *  1. lambda   <= 0
520    *  2. r        <= 0
521    *  3. r*lambda == 0
522    */
523 
524   /* Bracketing Phase */
525   r = phi(x, n, lambda, a, b, c, l, u);
526 
527   if(nonNegativeSlack) {
528     /* inequality constraint, i.e., with \xi >= 0 constraint */
529     if (r < TOL_R) return 0;
530   } else  {
531     /* equality constraint ,i.e., without \xi >= 0 constraint */
532     if (fabs(r) < TOL_R) return 0;
533   }
534 
535   if (r < 0.0){
536     lambdal = lambda;
537     rl      = r;
538     lambda  = lambda + dlambda;
539     r       = phi(x, n, lambda, a, b, c, l, u);
540     while (r < 0.0 && dlambda < BMRM_INFTY)  {
541       lambdal = lambda;
542       s       = rl/r - 1.0;
543       if (s < 0.1) s = 0.1;
544       dlambda = dlambda + dlambda/s;
545       lambda  = lambda + dlambda;
546       rl      = r;
547       r       = phi(x, n, lambda, a, b, c, l, u);
548     }
549     lambdau = lambda;
550     ru      = r;
551   } else {
552     lambdau = lambda;
553     ru      = r;
554     lambda  = lambda - dlambda;
555     r       = phi(x, n, lambda, a, b, c, l, u);
556     while (r > 0.0 && dlambda > -BMRM_INFTY) {
557       lambdau = lambda;
558       s       = ru/r - 1.0;
559       if (s < 0.1) s = 0.1;
560       dlambda = dlambda + dlambda/s;
561       lambda  = lambda - dlambda;
562       ru      = r;
563       r       = phi(x, n, lambda, a, b, c, l, u);
564     }
565     lambdal = lambda;
566     rl      = r;
567   }
568 
569   if(fabs(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
570 
571   if(ru == 0){
572     lambda = lambdau;
573     r = phi(x, n, lambda, a, b, c, l, u);
574     return innerIter;
575   }
576 
577   /* Secant Phase */
578   s       = 1.0 - rl/ru;
579   dlambda = dlambda/s;
580   lambda  = lambdau - dlambda;
581   r       = phi(x, n, lambda, a, b, c, l, u);
582 
583   while (fabs(r) > TOL_R
584          && dlambda > TOL_LAM * (1.0 + fabs(lambda))
585          && innerIter < df->maxProjIter){
586     innerIter++;
587     if (r > 0.0){
588       if (s <= 2.0){
589         lambdau = lambda;
590         ru      = r;
591         s       = 1.0 - rl/ru;
592         dlambda = (lambdau - lambdal) / s;
593         lambda  = lambdau - dlambda;
594       } else {
595         s          = ru/r-1.0;
596         if (s < 0.1) s = 0.1;
597         dlambda    = (lambdau - lambda) / s;
598         lambda_new = 0.75*lambdal + 0.25*lambda;
599         if (lambda_new < (lambda - dlambda))
600           lambda_new = lambda - dlambda;
601         lambdau    = lambda;
602         ru         = r;
603         lambda     = lambda_new;
604         s          = (lambdau - lambdal) / (lambdau - lambda);
605       }
606     } else {
607       if (s >= 2.0){
608         lambdal = lambda;
609         rl      = r;
610         s       = 1.0 - rl/ru;
611         dlambda = (lambdau - lambdal) / s;
612         lambda  = lambdau - dlambda;
613       } else {
614         s          = rl/r - 1.0;
615         if (s < 0.1) s = 0.1;
616         dlambda    = (lambda-lambdal) / s;
617         lambda_new = 0.75*lambdau + 0.25*lambda;
618         if (lambda_new > (lambda + dlambda))
619           lambda_new = lambda + dlambda;
620         lambdal    = lambda;
621         rl         = r;
622         lambda     = lambda_new;
623         s          = (lambdau - lambdal) / (lambdau-lambda);
624       }
625     }
626     r = phi(x, n, lambda, a, b, c, l, u);
627   }
628 
629   *lam_ext = lambda;
630   if(innerIter >= df->maxProjIter) {
631     ierr = PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
632   }
633   return innerIter;
634 }
635 
636 
637 #undef __FUNCT__
638 #define __FUNCT__ "solve"
639 PetscErrorCode solve(TAO_DF *df)
640 {
641   PetscErrorCode ierr;
642   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
643   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
644   PetscReal      DELTAsv, ProdDELTAsv;
645   PetscReal      c, *tempQ;
646   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
647   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
648   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
649   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
650   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
651 
652   /*** variables for the adaptive nonmonotone linesearch ***/
653   PetscInt    L, llast;
654   PetscReal   fr, fbest, fv, fc, fv0;
655 
656   c = BMRM_INFTY;
657 
658   DELTAsv = EPS_SV;
659   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
660   else  ProdDELTAsv = EPS_SV;
661 
662   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
663 
664   lam_ext = 0.0;
665 
666   /* Project the initial solution */
667   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
668 
669   /* Compute gradient
670      g = Q*x + f; */
671 
672   it = 0;
673   for (i = 0; i < dim; i++) {
674     if (fabs(x[i]) > ProdDELTAsv) ipt[it++] = i;
675   }
676 
677   ierr = PetscMemzero(t, dim*sizeof(PetscReal)); CHKERRQ(ierr);
678   for (i = 0; i < it; i++){
679     tempQ = Q[ipt[i]];
680     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
681   }
682   for (i = 0; i < dim; i++){
683     g[i] = t[i] + f[i];
684   }
685 
686 
687   /* y = -(x_{k} - g_{k}) */
688   for (i = 0; i < dim; i++){
689     y[i] = g[i] - x[i];
690   }
691 
692   /* Project x_{k} - g_{k} */
693   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
694 
695   /* y = P(x_{k} - g_{k}) - x_{k} */
696   max = ALPHA_MIN;
697   for (i = 0; i < dim; i++){
698     y[i] = tempv[i] - x[i];
699     if (fabs(y[i]) > max) max = fabs(y[i]);
700   }
701 
702   if (max < tol*1e-3){
703     lscount = 0;
704     innerIter    = 0;
705     return 0;
706   }
707 
708   alpha = 1.0 / max;
709 
710   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
711   fv0   = 0.0;
712   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
713 
714   /*** adaptive nonmonotone linesearch ***/
715   L     = 2;
716   fr    = ALPHA_MAX;
717   fbest = fv0;
718   fc    = fv0;
719   llast = 0;
720   akold = bkold = 0.0;
721 
722   /***      Iterator begins     ***/
723   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
724 
725     /* tempv = -(x_{k} - alpha*g_{k}) */
726     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
727 
728     /* Project x_{k} - alpha*g_{k} */
729     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
730 
731 
732     /* gd = \inner{d_{k}}{g_{k}}
733         d = P(x_{k} - alpha*g_{k}) - x_{k}
734     */
735     gd = 0.0;
736     for (i = 0; i < dim; i++){
737       d[i] = y[i] - x[i];
738       gd  += d[i] * g[i];
739     }
740 
741     /* Gradient computation  */
742 
743     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
744 
745     it = it2 = 0;
746     for (i = 0; i < dim; i++){
747       if (fabs(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
748     }
749     for (i = 0; i < dim; i++) {
750       if (fabs(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
751     }
752 
753     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal)); CHKERRQ(ierr);
754     /* compute Qd = Q*d */
755     if (it < it2){
756       for (i = 0; i < it; i++){
757         tempQ = Q[ipt[i]];
758         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
759       }
760     } else { /* compute Qd = Q*y-t */
761       for (i = 0; i < it2; i++){
762         tempQ = Q[ipt2[i]];
763         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
764       }
765       for (j = 0; j < dim; j++) Qd[j] -= t[j];
766     }
767 
768     /* ak = inner{d_{k}}{d_{k}} */
769     ak = 0.0;
770     for (i = 0; i < dim; i++) ak += d[i] * d[i];
771 
772     bk = 0.0;
773     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
774 
775     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
776     else lamnew = 1.0;
777 
778     /* fv is computing f(x_{k} + d_{k}) */
779     fv = 0.0;
780     for (i = 0; i < dim; i++){
781       xplus[i] = x[i] + d[i];
782       tplus[i] = t[i] + Qd[i];
783       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
784     }
785 
786     /* fr is fref */
787     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
788       lscount++;
789       fv = 0.0;
790       for (i = 0; i < dim; i++){
791         xplus[i] = x[i] + lamnew*d[i];
792         tplus[i] = t[i] + lamnew*Qd[i];
793         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
794       }
795     }
796 
797     for (i = 0; i < dim; i++){
798       sk[i] = xplus[i] - x[i];
799       yk[i] = tplus[i] - t[i];
800       x[i]  = xplus[i];
801       t[i]  = tplus[i];
802       g[i]  = t[i] + f[i];
803     }
804 
805     /* update the line search control parameters */
806     if (fv < fbest){
807       fbest = fv;
808       fc    = fv;
809       llast = 0;
810     } else {
811       fc = (fc > fv ? fc : fv);
812       llast++;
813       if (llast == L){
814         fr    = fc;
815         fc    = fv;
816         llast = 0;
817       }
818     }
819 
820     ak = bk = 0.0;
821     for (i = 0; i < dim; i++){
822       ak += sk[i] * sk[i];
823       bk += sk[i] * yk[i];
824     }
825 
826     if (bk <= EPS*ak) alpha = ALPHA_MAX;
827     else {
828       if (bkold < EPS*akold) alpha = ak/bk;
829       else alpha = (akold+ak)/(bkold+bk);
830 
831       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
832       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
833     }
834 
835     akold = ak;
836     bkold = bk;
837 
838     /*** stopping criterion based on KKT conditions ***/
839     /* at optimal, gradient of lagrangian w.r.t. x is zero */
840 
841     bk = 0.0;
842     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
843 
844     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)){
845       it     = 0;
846       luv    = 0;
847       kktlam = 0.0;
848       for (i = 0; i < dim; i++){
849         /* x[i] is active hence lagrange multipliers for box constraints
850                 are zero. The lagrange multiplier for ineq. const. is then
851                 defined as below
852         */
853         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
854           ipt[it++] = i;
855           kktlam    = kktlam - a[i]*g[i];
856         } else  uv[luv++] = i;
857       }
858 
859       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
860       else {
861         kktlam = kktlam/it;
862         info   = 1;
863         for (i = 0; i < it; i++) {
864           if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
865             info = 0;
866             break;
867           }
868         }
869         if (info == 1)  {
870           for (i = 0; i < luv; i++)  {
871             if (x[uv[i]] <= DELTAsv){
872               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
873                      not be zero. So, the gradient without beta is > 0
874               */
875               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
876                 info = 0;
877                 break;
878               }
879             } else {
880               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
881                      not be zero. So, the gradient without eta is < 0
882               */
883               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
884                 info = 0;
885                 break;
886               }
887             }
888           }
889         }
890 
891         if (info == 1) return 0;
892       }
893     }
894   }
895   return 0;
896 }
897 
898 
899