xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision 94ae4db594e046293ec15c24a9f6581a7ec3c4e6)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 
11 /*------------------------------------------------------------*/
12 /* The main solver function
13 
14    f = Remp(W)          This is what the user provides us from the application layer
15    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
16 
17    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
18 */
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "make_grad_node"
22 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = PetscNew(p);CHKERRQ(ierr);
28   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
29   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
30   (*p)->next = NULL;
31   PetscFunctionReturn(0);
32 }
33 
34 #undef __FUNCT__
35 #define __FUNCT__ "destroy_grad_list"
36 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
37 {
38   PetscErrorCode ierr;
39   Vec_Chain      *p = head->next, *q;
40 
41   PetscFunctionBegin;
42   while(p) {
43     q = p->next;
44     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
45     ierr = PetscFree(p);CHKERRQ(ierr);
46     p = q;
47   }
48   head->next = NULL;
49   PetscFunctionReturn(0);
50 }
51 
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "TaoSolve_BMRM"
55 static PetscErrorCode TaoSolve_BMRM(Tao tao)
56 {
57   PetscErrorCode     ierr;
58   TaoConvergedReason reason;
59   TAO_DF             df;
60   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
61 
62   /* Values and pointers to parts of the optimization problem */
63   PetscReal          f = 0.0;
64   Vec                W = tao->solution;
65   Vec                G = tao->gradient;
66   PetscReal          lambda;
67   PetscReal          bt;
68   Vec_Chain          grad_list, *tail_glist, *pgrad;
69   PetscInt           iter = 0;
70   PetscInt           i;
71   PetscMPIInt        rank;
72 
73   /* Used in converged criteria check */
74   PetscReal          reg;
75   PetscReal          jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
76   PetscReal          innerSolverTol;
77   MPI_Comm           comm;
78 
79   PetscFunctionBegin;
80   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
81   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
82   lambda = bmrm->lambda;
83 
84   /* Check Stopping Condition */
85   tao->step = 1.0;
86   max_jtwt = -BMRM_INFTY;
87   min_jw = BMRM_INFTY;
88   innerSolverTol = 1.0;
89   epsilon = 0.0;
90 
91   if (!rank) {
92     ierr = init_df_solver(&df);CHKERRQ(ierr);
93     grad_list.next = NULL;
94     tail_glist = &grad_list;
95   }
96 
97   df.tol = 1e-6;
98   reason = TAO_CONTINUE_ITERATING;
99 
100   /*-----------------Algorithm Begins------------------------*/
101   /* make the scatter */
102   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
103   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
104   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
105 
106   /* NOTE: In application pass the sub-gradient of Remp(W) */
107   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
108   ierr = TaoMonitor(tao,iter,f,1.0,0.0,tao->step,&reason);CHKERRQ(ierr);
109   while (reason == TAO_CONTINUE_ITERATING) {
110     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
111     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
112     bt = f - bt;
113 
114     /* First gather the gradient to the master node */
115     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
116     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
117 
118     /* Bring up the inner solver */
119     if (!rank) {
120       ierr = ensure_df_space(iter+1, &df); CHKERRQ(ierr);
121       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
122       tail_glist->next = pgrad;
123       tail_glist = pgrad;
124 
125       df.a[iter] = 1.0;
126       df.f[iter] = -bt;
127       df.u[iter] = 1.0;
128       df.l[iter] = 0.0;
129 
130       /* set up the Q */
131       pgrad = grad_list.next;
132       for (i=0; i<=iter; i++) {
133         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
134         df.Q[i][iter] = df.Q[iter][i] = reg / lambda;
135         pgrad = pgrad->next;
136       }
137 
138       if (iter > 0) {
139         df.x[iter] = 0.0;
140         ierr = solve(&df); CHKERRQ(ierr);
141       } else
142         df.x[0] = 1.0;
143 
144       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
145       jtwt = 0.0;
146       ierr = VecSet(bmrm->local_w, 0.0); CHKERRQ(ierr);
147       pgrad = grad_list.next;
148       for (i=0; i<=iter; i++) {
149         jtwt -= df.x[i] * df.f[i];
150         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V); CHKERRQ(ierr);
151         pgrad = pgrad->next;
152       }
153 
154       ierr = VecNorm(bmrm->local_w, NORM_2, &reg); CHKERRQ(ierr);
155       reg = 0.5*lambda*reg*reg;
156       jtwt -= reg;
157     } /* end if rank == 0 */
158 
159     /* scatter the new W to all nodes */
160     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
161     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
162 
163     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
164 
165     ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
166     ierr = MPI_Bcast(&reg,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
167 
168     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
169     if (jw < min_jw) min_jw = jw;
170     if (jtwt > max_jtwt) max_jtwt = jtwt;
171 
172     pre_epsilon = epsilon;
173     epsilon = min_jw - jtwt;
174 
175     if (!rank) {
176       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
177       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
178 
179       /* if the annealing doesn't work well, lower the inner solver tolerance */
180       if(pre_epsilon < epsilon) innerSolverTol *= 0.2;
181 
182       df.tol = innerSolverTol*0.5;
183     }
184 
185     iter++;
186     ierr = TaoMonitor(tao,iter,min_jw,epsilon,0.0,tao->step,&reason);CHKERRQ(ierr);
187   }
188 
189   /* free all the memory */
190   if (!rank) {
191     ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
192     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
193   }
194 
195   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
196   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
197   PetscFunctionReturn(0);
198 }
199 
200 
201 /* ---------------------------------------------------------- */
202 
203 #undef __FUNCT__
204 #define __FUNCT__ "TaoSetup_BMRM"
205 static PetscErrorCode TaoSetup_BMRM(Tao tao)
206 {
207 
208   PetscErrorCode ierr;
209 
210   PetscFunctionBegin;
211   /* Allocate some arrays */
212   if (!tao->gradient) {
213     ierr = VecDuplicate(tao->solution, &tao->gradient);   CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 /*------------------------------------------------------------*/
219 #undef __FUNCT__
220 #define __FUNCT__ "TaoDestroy_BMRM"
221 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
222 {
223   PetscErrorCode ierr;
224 
225   PetscFunctionBegin;
226   ierr = PetscFree(tao->data);CHKERRQ(ierr);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "TaoSetFromOptions_BMRM"
232 static PetscErrorCode TaoSetFromOptions_BMRM(Tao tao)
233 {
234   PetscErrorCode ierr;
235   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
236 
237   PetscFunctionBegin;
238   ierr = PetscOptionsHead("BMRM for regularized risk minimization");CHKERRQ(ierr);
239   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,NULL); CHKERRQ(ierr);
240   ierr = PetscOptionsTail();CHKERRQ(ierr);
241   PetscFunctionReturn(0);
242 }
243 
244 /*------------------------------------------------------------*/
245 #undef __FUNCT__
246 #define __FUNCT__ "TaoView_BMRM"
247 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
248 {
249   PetscBool      isascii;
250   PetscErrorCode ierr;
251 
252   PetscFunctionBegin;
253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
254   if (isascii) {
255     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
256     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
257   }
258   PetscFunctionReturn(0);
259 }
260 
261 /*------------------------------------------------------------*/
262 /*MC
263   TAOBMRM - bundle method for regularized risk minimization
264 
265   Options Database Keys:
266 . - tao_bmrm_lambda - regulariser weight
267 
268   Level: beginner
269 M*/
270 
271 EXTERN_C_BEGIN
272 #undef __FUNCT__
273 #define __FUNCT__ "TaoCreate_BMRM"
274 PetscErrorCode TaoCreate_BMRM(Tao tao)
275 {
276   TAO_BMRM       *bmrm;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   tao->ops->setup = TaoSetup_BMRM;
281   tao->ops->solve = TaoSolve_BMRM;
282   tao->ops->view  = TaoView_BMRM;
283   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
284   tao->ops->destroy = TaoDestroy_BMRM;
285 
286   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
287   bmrm->lambda = 1.0;
288   tao->data = (void*)bmrm;
289 
290   /* Note: May need to be tuned! */
291   tao->max_it = 2048;
292   tao->max_funcs = 300000;
293   tao->fatol = 1e-20;
294   tao->frtol = 1e-25;
295   tao->gatol = 1e-25;
296   tao->grtol = 1e-25;
297 
298   PetscFunctionReturn(0);
299 }
300 EXTERN_C_END
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "init_df_solver"
304 PetscErrorCode init_df_solver(TAO_DF *df)
305 {
306   PetscInt       i, n = INCRE_DIM;
307   PetscErrorCode ierr;
308 
309   PetscFunctionBegin;
310   /* default values */
311   df->maxProjIter = 200;
312   df->maxPGMIter = 300000;
313   df->b = 1.0;
314 
315   /* memory space required by Dai-Fletcher */
316   df->cur_num_cp = n;
317   ierr = PetscMalloc1(n, &df->f); CHKERRQ(ierr);
318   ierr = PetscMalloc1(n, &df->a); CHKERRQ(ierr);
319   ierr = PetscMalloc1(n, &df->l); CHKERRQ(ierr);
320   ierr = PetscMalloc1(n, &df->u); CHKERRQ(ierr);
321   ierr = PetscMalloc1(n, &df->x); CHKERRQ(ierr);
322   ierr = PetscMalloc1(n, &df->Q); CHKERRQ(ierr);
323 
324   for (i = 0; i < n; i ++) {
325     ierr = PetscMalloc1(n, &df->Q[i]); CHKERRQ(ierr);
326   }
327 
328   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
329   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
330   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
331   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
332   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
333   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
334   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
335   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
336   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
337   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
338 
339   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
340   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
341   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 #undef __FUNCT__
346 #define __FUNCT__ "ensure_df_space"
347 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
348 {
349   PetscErrorCode ierr;
350   PetscReal      *tmp, **tmp_Q;
351   PetscInt       i, n, old_n;
352 
353   PetscFunctionBegin;
354   df->dim = dim;
355   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
356 
357   old_n = df->cur_num_cp;
358   df->cur_num_cp += INCRE_DIM;
359   n = df->cur_num_cp;
360 
361   /* memory space required by dai-fletcher */
362   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
363   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
364   ierr = PetscFree(df->f); CHKERRQ(ierr);
365   df->f = tmp;
366 
367   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
368   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
369   ierr = PetscFree(df->a); CHKERRQ(ierr);
370   df->a = tmp;
371 
372   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
373   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
374   ierr = PetscFree(df->l); CHKERRQ(ierr);
375   df->l = tmp;
376 
377   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
378   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
379   ierr = PetscFree(df->u); CHKERRQ(ierr);
380   df->u = tmp;
381 
382   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
383   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
384   ierr = PetscFree(df->x); CHKERRQ(ierr);
385   df->x = tmp;
386 
387   ierr = PetscMalloc1(n, &tmp_Q); CHKERRQ(ierr);
388   for (i = 0; i < n; i ++) {
389     ierr = PetscMalloc1(n, &tmp_Q[i]); CHKERRQ(ierr);
390     if (i < old_n) {
391       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n); CHKERRQ(ierr);
392       ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
393     }
394   }
395 
396   ierr = PetscFree(df->Q); CHKERRQ(ierr);
397   df->Q = tmp_Q;
398 
399   ierr = PetscFree(df->g); CHKERRQ(ierr);
400   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
401 
402   ierr = PetscFree(df->y); CHKERRQ(ierr);
403   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
404 
405   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
406   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
407 
408   ierr = PetscFree(df->d); CHKERRQ(ierr);
409   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
410 
411   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
412   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
413 
414   ierr = PetscFree(df->t); CHKERRQ(ierr);
415   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
416 
417   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
418   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
419 
420   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
421   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
422 
423   ierr = PetscFree(df->sk); CHKERRQ(ierr);
424   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
425 
426   ierr = PetscFree(df->yk); CHKERRQ(ierr);
427   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
428 
429   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
430   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
431 
432   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
433   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
434 
435   ierr = PetscFree(df->uv); CHKERRQ(ierr);
436   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
437   PetscFunctionReturn(0);
438 }
439 
440 #undef __FUNCT__
441 #define __FUNCT__ "destroy_df_solver"
442 PetscErrorCode destroy_df_solver(TAO_DF *df)
443 {
444   PetscErrorCode ierr;
445   PetscInt       i;
446 
447   PetscFunctionBegin;
448   ierr = PetscFree(df->f); CHKERRQ(ierr);
449   ierr = PetscFree(df->a); CHKERRQ(ierr);
450   ierr = PetscFree(df->l); CHKERRQ(ierr);
451   ierr = PetscFree(df->u); CHKERRQ(ierr);
452   ierr = PetscFree(df->x); CHKERRQ(ierr);
453 
454   for (i = 0; i < df->cur_num_cp; i ++) {
455     ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
456   }
457   ierr = PetscFree(df->Q); CHKERRQ(ierr);
458   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
459   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
460   ierr = PetscFree(df->uv); CHKERRQ(ierr);
461   ierr = PetscFree(df->g); CHKERRQ(ierr);
462   ierr = PetscFree(df->y); CHKERRQ(ierr);
463   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
464   ierr = PetscFree(df->d); CHKERRQ(ierr);
465   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
466   ierr = PetscFree(df->t); CHKERRQ(ierr);
467   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
468   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
469   ierr = PetscFree(df->sk); CHKERRQ(ierr);
470   ierr = PetscFree(df->yk); CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
475 #undef __FUNCT__
476 #define __FUNCT__ "phi"
477 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
478 {
479   PetscReal r = 0.0;
480   PetscInt  i;
481 
482   for (i = 0; i < n; i++){
483     x[i] = -c[i] + lambda*a[i];
484     if (x[i] > u[i])     x[i] = u[i];
485     else if(x[i] < l[i]) x[i] = l[i];
486     r += a[i]*x[i];
487   }
488   return r - b;
489 }
490 
491 /** Modified Dai-Fletcher QP projector solves the problem:
492  *
493  *      minimise  0.5*x'*x - c'*x
494  *      subj to   a'*x = b
495  *                l \leq x \leq u
496  *
497  *  \param c The point to be projected onto feasible set
498  */
499 #undef __FUNCT__
500 #define __FUNCT__ "project"
501 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
502 {
503   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
504   PetscReal      r, rl, ru, s;
505   PetscInt       innerIter;
506   PetscBool      nonNegativeSlack = PETSC_FALSE;
507   PetscErrorCode ierr;
508 
509   *lam_ext = 0;
510   lambda  = 0;
511   dlambda = 0.5;
512   innerIter = 1;
513 
514   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
515    *
516    *  Optimality conditions for \phi:
517    *
518    *  1. lambda   <= 0
519    *  2. r        <= 0
520    *  3. r*lambda == 0
521    */
522 
523   /* Bracketing Phase */
524   r = phi(x, n, lambda, a, b, c, l, u);
525 
526   if(nonNegativeSlack) {
527     /* inequality constraint, i.e., with \xi >= 0 constraint */
528     if (r < TOL_R) return 0;
529   } else  {
530     /* equality constraint ,i.e., without \xi >= 0 constraint */
531     if (fabs(r) < TOL_R) return 0;
532   }
533 
534   if (r < 0.0){
535     lambdal = lambda;
536     rl      = r;
537     lambda  = lambda + dlambda;
538     r       = phi(x, n, lambda, a, b, c, l, u);
539     while (r < 0.0 && dlambda < BMRM_INFTY)  {
540       lambdal = lambda;
541       s       = rl/r - 1.0;
542       if (s < 0.1) s = 0.1;
543       dlambda = dlambda + dlambda/s;
544       lambda  = lambda + dlambda;
545       rl      = r;
546       r       = phi(x, n, lambda, a, b, c, l, u);
547     }
548     lambdau = lambda;
549     ru      = r;
550   } else {
551     lambdau = lambda;
552     ru      = r;
553     lambda  = lambda - dlambda;
554     r       = phi(x, n, lambda, a, b, c, l, u);
555     while (r > 0.0 && dlambda > -BMRM_INFTY) {
556       lambdau = lambda;
557       s       = ru/r - 1.0;
558       if (s < 0.1) s = 0.1;
559       dlambda = dlambda + dlambda/s;
560       lambda  = lambda - dlambda;
561       ru      = r;
562       r       = phi(x, n, lambda, a, b, c, l, u);
563     }
564     lambdal = lambda;
565     rl      = r;
566   }
567 
568   if(fabs(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
569 
570   if(ru == 0){
571     lambda = lambdau;
572     r = phi(x, n, lambda, a, b, c, l, u);
573     return innerIter;
574   }
575 
576   /* Secant Phase */
577   s       = 1.0 - rl/ru;
578   dlambda = dlambda/s;
579   lambda  = lambdau - dlambda;
580   r       = phi(x, n, lambda, a, b, c, l, u);
581 
582   while (fabs(r) > TOL_R
583          && dlambda > TOL_LAM * (1.0 + fabs(lambda))
584          && innerIter < df->maxProjIter){
585     innerIter++;
586     if (r > 0.0){
587       if (s <= 2.0){
588         lambdau = lambda;
589         ru      = r;
590         s       = 1.0 - rl/ru;
591         dlambda = (lambdau - lambdal) / s;
592         lambda  = lambdau - dlambda;
593       } else {
594         s          = ru/r-1.0;
595         if (s < 0.1) s = 0.1;
596         dlambda    = (lambdau - lambda) / s;
597         lambda_new = 0.75*lambdal + 0.25*lambda;
598         if (lambda_new < (lambda - dlambda))
599           lambda_new = lambda - dlambda;
600         lambdau    = lambda;
601         ru         = r;
602         lambda     = lambda_new;
603         s          = (lambdau - lambdal) / (lambdau - lambda);
604       }
605     } else {
606       if (s >= 2.0){
607         lambdal = lambda;
608         rl      = r;
609         s       = 1.0 - rl/ru;
610         dlambda = (lambdau - lambdal) / s;
611         lambda  = lambdau - dlambda;
612       } else {
613         s          = rl/r - 1.0;
614         if (s < 0.1) s = 0.1;
615         dlambda    = (lambda-lambdal) / s;
616         lambda_new = 0.75*lambdau + 0.25*lambda;
617         if (lambda_new > (lambda + dlambda))
618           lambda_new = lambda + dlambda;
619         lambdal    = lambda;
620         rl         = r;
621         lambda     = lambda_new;
622         s          = (lambdau - lambdal) / (lambdau-lambda);
623       }
624     }
625     r = phi(x, n, lambda, a, b, c, l, u);
626   }
627 
628   *lam_ext = lambda;
629   if(innerIter >= df->maxProjIter) {
630     ierr = PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
631   }
632   return innerIter;
633 }
634 
635 
636 #undef __FUNCT__
637 #define __FUNCT__ "solve"
638 PetscErrorCode solve(TAO_DF *df)
639 {
640   PetscErrorCode ierr;
641   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
642   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
643   PetscReal      DELTAsv, ProdDELTAsv;
644   PetscReal      c, *tempQ;
645   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
646   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
647   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
648   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
649   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
650 
651   /*** variables for the adaptive nonmonotone linesearch ***/
652   PetscInt    L, llast;
653   PetscReal   fr, fbest, fv, fc, fv0;
654 
655   c = BMRM_INFTY;
656 
657   DELTAsv = EPS_SV;
658   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
659   else  ProdDELTAsv = EPS_SV;
660 
661   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
662 
663   lam_ext = 0.0;
664 
665   /* Project the initial solution */
666   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
667 
668   /* Compute gradient
669      g = Q*x + f; */
670 
671   it = 0;
672   for (i = 0; i < dim; i++) {
673     if (fabs(x[i]) > ProdDELTAsv) ipt[it++] = i;
674   }
675 
676   ierr = PetscMemzero(t, dim*sizeof(PetscReal)); CHKERRQ(ierr);
677   for (i = 0; i < it; i++){
678     tempQ = Q[ipt[i]];
679     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
680   }
681   for (i = 0; i < dim; i++){
682     g[i] = t[i] + f[i];
683   }
684 
685 
686   /* y = -(x_{k} - g_{k}) */
687   for (i = 0; i < dim; i++){
688     y[i] = g[i] - x[i];
689   }
690 
691   /* Project x_{k} - g_{k} */
692   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
693 
694   /* y = P(x_{k} - g_{k}) - x_{k} */
695   max = ALPHA_MIN;
696   for (i = 0; i < dim; i++){
697     y[i] = tempv[i] - x[i];
698     if (fabs(y[i]) > max) max = fabs(y[i]);
699   }
700 
701   if (max < tol*1e-3){
702     lscount = 0;
703     innerIter    = 0;
704     return 0;
705   }
706 
707   alpha = 1.0 / max;
708 
709   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
710   fv0   = 0.0;
711   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
712 
713   /*** adaptive nonmonotone linesearch ***/
714   L     = 2;
715   fr    = ALPHA_MAX;
716   fbest = fv0;
717   fc    = fv0;
718   llast = 0;
719   akold = bkold = 0.0;
720 
721   /***      Iterator begins     ***/
722   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
723 
724     /* tempv = -(x_{k} - alpha*g_{k}) */
725     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
726 
727     /* Project x_{k} - alpha*g_{k} */
728     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
729 
730 
731     /* gd = \inner{d_{k}}{g_{k}}
732         d = P(x_{k} - alpha*g_{k}) - x_{k}
733     */
734     gd = 0.0;
735     for (i = 0; i < dim; i++){
736       d[i] = y[i] - x[i];
737       gd  += d[i] * g[i];
738     }
739 
740     /* Gradient computation  */
741 
742     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
743 
744     it = it2 = 0;
745     for (i = 0; i < dim; i++){
746       if (fabs(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
747     }
748     for (i = 0; i < dim; i++) {
749       if (fabs(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
750     }
751 
752     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal)); CHKERRQ(ierr);
753     /* compute Qd = Q*d */
754     if (it < it2){
755       for (i = 0; i < it; i++){
756         tempQ = Q[ipt[i]];
757         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
758       }
759     } else { /* compute Qd = Q*y-t */
760       for (i = 0; i < it2; i++){
761         tempQ = Q[ipt2[i]];
762         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
763       }
764       for (j = 0; j < dim; j++) Qd[j] -= t[j];
765     }
766 
767     /* ak = inner{d_{k}}{d_{k}} */
768     ak = 0.0;
769     for (i = 0; i < dim; i++) ak += d[i] * d[i];
770 
771     bk = 0.0;
772     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
773 
774     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
775     else lamnew = 1.0;
776 
777     /* fv is computing f(x_{k} + d_{k}) */
778     fv = 0.0;
779     for (i = 0; i < dim; i++){
780       xplus[i] = x[i] + d[i];
781       tplus[i] = t[i] + Qd[i];
782       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
783     }
784 
785     /* fr is fref */
786     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
787       lscount++;
788       fv = 0.0;
789       for (i = 0; i < dim; i++){
790         xplus[i] = x[i] + lamnew*d[i];
791         tplus[i] = t[i] + lamnew*Qd[i];
792         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
793       }
794     }
795 
796     for (i = 0; i < dim; i++){
797       sk[i] = xplus[i] - x[i];
798       yk[i] = tplus[i] - t[i];
799       x[i]  = xplus[i];
800       t[i]  = tplus[i];
801       g[i]  = t[i] + f[i];
802     }
803 
804     /* update the line search control parameters */
805     if (fv < fbest){
806       fbest = fv;
807       fc    = fv;
808       llast = 0;
809     } else {
810       fc = (fc > fv ? fc : fv);
811       llast++;
812       if (llast == L){
813         fr    = fc;
814         fc    = fv;
815         llast = 0;
816       }
817     }
818 
819     ak = bk = 0.0;
820     for (i = 0; i < dim; i++){
821       ak += sk[i] * sk[i];
822       bk += sk[i] * yk[i];
823     }
824 
825     if (bk <= EPS*ak) alpha = ALPHA_MAX;
826     else {
827       if (bkold < EPS*akold) alpha = ak/bk;
828       else alpha = (akold+ak)/(bkold+bk);
829 
830       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
831       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
832     }
833 
834     akold = ak;
835     bkold = bk;
836 
837     /*** stopping criterion based on KKT conditions ***/
838     /* at optimal, gradient of lagrangian w.r.t. x is zero */
839 
840     bk = 0.0;
841     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
842 
843     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)){
844       it     = 0;
845       luv    = 0;
846       kktlam = 0.0;
847       for (i = 0; i < dim; i++){
848         /* x[i] is active hence lagrange multipliers for box constraints
849                 are zero. The lagrange multiplier for ineq. const. is then
850                 defined as below
851         */
852         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
853           ipt[it++] = i;
854           kktlam    = kktlam - a[i]*g[i];
855         } else  uv[luv++] = i;
856       }
857 
858       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
859       else {
860         kktlam = kktlam/it;
861         info   = 1;
862         for (i = 0; i < it; i++) {
863           if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
864             info = 0;
865             break;
866           }
867         }
868         if (info == 1)  {
869           for (i = 0; i < luv; i++)  {
870             if (x[uv[i]] <= DELTAsv){
871               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
872                      not be zero. So, the gradient without beta is > 0
873               */
874               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
875                 info = 0;
876                 break;
877               }
878             } else {
879               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
880                      not be zero. So, the gradient without eta is < 0
881               */
882               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
883                 info = 0;
884                 break;
885               }
886             }
887           }
888         }
889 
890         if (info == 1) return 0;
891       }
892     }
893   }
894   return 0;
895 }
896 
897 
898