xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision e7f46db8d62cb2e4e59111bf21061e64ea60daab)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 
11 /*------------------------------------------------------------*/
12 /* The main solver function
13 
14    f = Remp(W)          This is what the user provides us from the application layer
15    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
16 
17    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
18 */
19 
20 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
21 {
22   PetscErrorCode ierr;
23 
24   PetscFunctionBegin;
25   ierr = PetscNew(p);CHKERRQ(ierr);
26   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
27   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
28   (*p)->next = NULL;
29   PetscFunctionReturn(0);
30 }
31 
32 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
33 {
34   PetscErrorCode ierr;
35   Vec_Chain      *p = head->next, *q;
36 
37   PetscFunctionBegin;
38   while(p) {
39     q = p->next;
40     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
41     ierr = PetscFree(p);CHKERRQ(ierr);
42     p = q;
43   }
44   head->next = NULL;
45   PetscFunctionReturn(0);
46 }
47 
48 
49 static PetscErrorCode TaoSolve_BMRM(Tao tao)
50 {
51   PetscErrorCode     ierr;
52   TAO_DF             df;
53   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
54 
55   /* Values and pointers to parts of the optimization problem */
56   PetscReal          f = 0.0;
57   Vec                W = tao->solution;
58   Vec                G = tao->gradient;
59   PetscReal          lambda;
60   PetscReal          bt;
61   Vec_Chain          grad_list, *tail_glist, *pgrad;
62   PetscInt           i;
63   PetscMPIInt        rank;
64 
65   /* Used in converged criteria check */
66   PetscReal          reg;
67   PetscReal          jtwt = 0.0, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
68   PetscReal          innerSolverTol;
69   MPI_Comm           comm;
70 
71   PetscFunctionBegin;
72   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
73   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
74   lambda = bmrm->lambda;
75 
76   /* Check Stopping Condition */
77   tao->step = 1.0;
78   max_jtwt = -BMRM_INFTY;
79   min_jw = BMRM_INFTY;
80   innerSolverTol = 1.0;
81   epsilon = 0.0;
82 
83   if (!rank) {
84     ierr = init_df_solver(&df);CHKERRQ(ierr);
85     grad_list.next = NULL;
86     tail_glist = &grad_list;
87   }
88 
89   df.tol = 1e-6;
90   tao->reason = TAO_CONTINUE_ITERATING;
91 
92   /*-----------------Algorithm Begins------------------------*/
93   /* make the scatter */
94   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
95   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
96   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
97 
98   /* NOTE: In application pass the sub-gradient of Remp(W) */
99   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
100   ierr = TaoLogConvergenceHistory(tao,f,1.0,0.0,tao->ksp_its);CHKERRQ(ierr);
101   ierr = TaoMonitor(tao,tao->niter,f,1.0,0.0,tao->step);CHKERRQ(ierr);
102   ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
103 
104   while (tao->reason == TAO_CONTINUE_ITERATING) {
105     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
106     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
107     bt = f - bt;
108 
109     /* First gather the gradient to the master node */
110     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
111     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
112 
113     /* Bring up the inner solver */
114     if (!rank) {
115       ierr = ensure_df_space(tao->niter+1, &df);CHKERRQ(ierr);
116       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
117       tail_glist->next = pgrad;
118       tail_glist = pgrad;
119 
120       df.a[tao->niter] = 1.0;
121       df.f[tao->niter] = -bt;
122       df.u[tao->niter] = 1.0;
123       df.l[tao->niter] = 0.0;
124 
125       /* set up the Q */
126       pgrad = grad_list.next;
127       for (i=0; i<=tao->niter; i++) {
128         if (!pgrad) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Assert that there are at least tao->niter+1 pgrad available");
129         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
130         df.Q[i][tao->niter] = df.Q[tao->niter][i] = reg / lambda;
131         pgrad = pgrad->next;
132       }
133 
134       if (tao->niter > 0) {
135         df.x[tao->niter] = 0.0;
136         ierr = solve(&df);CHKERRQ(ierr);
137       } else
138         df.x[0] = 1.0;
139 
140       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
141       jtwt = 0.0;
142       ierr = VecSet(bmrm->local_w, 0.0);CHKERRQ(ierr);
143       pgrad = grad_list.next;
144       for (i=0; i<=tao->niter; i++) {
145         jtwt -= df.x[i] * df.f[i];
146         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V);CHKERRQ(ierr);
147         pgrad = pgrad->next;
148       }
149 
150       ierr = VecNorm(bmrm->local_w, NORM_2, &reg);CHKERRQ(ierr);
151       reg = 0.5*lambda*reg*reg;
152       jtwt -= reg;
153     } /* end if rank == 0 */
154 
155     /* scatter the new W to all nodes */
156     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
157     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
158 
159     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
160 
161     ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
162     ierr = MPI_Bcast(&reg,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
163 
164     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
165     if (jw < min_jw) min_jw = jw;
166     if (jtwt > max_jtwt) max_jtwt = jtwt;
167 
168     pre_epsilon = epsilon;
169     epsilon = min_jw - jtwt;
170 
171     if (!rank) {
172       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
173       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
174 
175       /* if the annealing doesn't work well, lower the inner solver tolerance */
176       if(pre_epsilon < epsilon) innerSolverTol *= 0.2;
177 
178       df.tol = innerSolverTol*0.5;
179     }
180 
181     tao->niter++;
182     ierr = TaoLogConvergenceHistory(tao,min_jw,epsilon,0.0,tao->ksp_its);CHKERRQ(ierr);
183     ierr = TaoMonitor(tao,tao->niter,min_jw,epsilon,0.0,tao->step);CHKERRQ(ierr);
184     ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
185   }
186 
187   /* free all the memory */
188   if (!rank) {
189     ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
190     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
191   }
192 
193   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
194   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
195   PetscFunctionReturn(0);
196 }
197 
198 
199 /* ---------------------------------------------------------- */
200 
201 static PetscErrorCode TaoSetup_BMRM(Tao tao)
202 {
203 
204   PetscErrorCode ierr;
205 
206   PetscFunctionBegin;
207   /* Allocate some arrays */
208   if (!tao->gradient) {
209     ierr = VecDuplicate(tao->solution, &tao->gradient);CHKERRQ(ierr);
210   }
211   PetscFunctionReturn(0);
212 }
213 
214 /*------------------------------------------------------------*/
215 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
216 {
217   PetscErrorCode ierr;
218 
219   PetscFunctionBegin;
220   ierr = PetscFree(tao->data);CHKERRQ(ierr);
221   PetscFunctionReturn(0);
222 }
223 
224 static PetscErrorCode TaoSetFromOptions_BMRM(PetscOptionItems *PetscOptionsObject,Tao tao)
225 {
226   PetscErrorCode ierr;
227   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
228 
229   PetscFunctionBegin;
230   ierr = PetscOptionsHead(PetscOptionsObject,"BMRM for regularized risk minimization");CHKERRQ(ierr);
231   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,NULL);CHKERRQ(ierr);
232   ierr = PetscOptionsTail();CHKERRQ(ierr);
233   PetscFunctionReturn(0);
234 }
235 
236 /*------------------------------------------------------------*/
237 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
238 {
239   PetscBool      isascii;
240   PetscErrorCode ierr;
241 
242   PetscFunctionBegin;
243   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
244   if (isascii) {
245     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
246     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 /*------------------------------------------------------------*/
252 /*MC
253   TAOBMRM - bundle method for regularized risk minimization
254 
255   Options Database Keys:
256 . - tao_bmrm_lambda - regulariser weight
257 
258   Level: beginner
259 M*/
260 
261 PETSC_EXTERN PetscErrorCode TaoCreate_BMRM(Tao tao)
262 {
263   TAO_BMRM       *bmrm;
264   PetscErrorCode ierr;
265 
266   PetscFunctionBegin;
267   tao->ops->setup = TaoSetup_BMRM;
268   tao->ops->solve = TaoSolve_BMRM;
269   tao->ops->view  = TaoView_BMRM;
270   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
271   tao->ops->destroy = TaoDestroy_BMRM;
272 
273   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
274   bmrm->lambda = 1.0;
275   tao->data = (void*)bmrm;
276 
277   /* Override default settings (unless already changed) */
278   if (!tao->max_it_changed) tao->max_it = 2000;
279   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
280   if (!tao->gatol_changed) tao->gatol = 1.0e-12;
281   if (!tao->grtol_changed) tao->grtol = 1.0e-12;
282 
283   PetscFunctionReturn(0);
284 }
285 
286 PetscErrorCode init_df_solver(TAO_DF *df)
287 {
288   PetscInt       i, n = INCRE_DIM;
289   PetscErrorCode ierr;
290 
291   PetscFunctionBegin;
292   /* default values */
293   df->maxProjIter = 200;
294   df->maxPGMIter = 300000;
295   df->b = 1.0;
296 
297   /* memory space required by Dai-Fletcher */
298   df->cur_num_cp = n;
299   ierr = PetscMalloc1(n, &df->f);CHKERRQ(ierr);
300   ierr = PetscMalloc1(n, &df->a);CHKERRQ(ierr);
301   ierr = PetscMalloc1(n, &df->l);CHKERRQ(ierr);
302   ierr = PetscMalloc1(n, &df->u);CHKERRQ(ierr);
303   ierr = PetscMalloc1(n, &df->x);CHKERRQ(ierr);
304   ierr = PetscMalloc1(n, &df->Q);CHKERRQ(ierr);
305 
306   for (i = 0; i < n; i ++) {
307     ierr = PetscMalloc1(n, &df->Q[i]);CHKERRQ(ierr);
308   }
309 
310   ierr = PetscMalloc1(n, &df->g);CHKERRQ(ierr);
311   ierr = PetscMalloc1(n, &df->y);CHKERRQ(ierr);
312   ierr = PetscMalloc1(n, &df->tempv);CHKERRQ(ierr);
313   ierr = PetscMalloc1(n, &df->d);CHKERRQ(ierr);
314   ierr = PetscMalloc1(n, &df->Qd);CHKERRQ(ierr);
315   ierr = PetscMalloc1(n, &df->t);CHKERRQ(ierr);
316   ierr = PetscMalloc1(n, &df->xplus);CHKERRQ(ierr);
317   ierr = PetscMalloc1(n, &df->tplus);CHKERRQ(ierr);
318   ierr = PetscMalloc1(n, &df->sk);CHKERRQ(ierr);
319   ierr = PetscMalloc1(n, &df->yk);CHKERRQ(ierr);
320 
321   ierr = PetscMalloc1(n, &df->ipt);CHKERRQ(ierr);
322   ierr = PetscMalloc1(n, &df->ipt2);CHKERRQ(ierr);
323   ierr = PetscMalloc1(n, &df->uv);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
328 {
329   PetscErrorCode ierr;
330   PetscReal      *tmp, **tmp_Q;
331   PetscInt       i, n, old_n;
332 
333   PetscFunctionBegin;
334   df->dim = dim;
335   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
336 
337   old_n = df->cur_num_cp;
338   df->cur_num_cp += INCRE_DIM;
339   n = df->cur_num_cp;
340 
341   /* memory space required by dai-fletcher */
342   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
343   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n);CHKERRQ(ierr);
344   ierr = PetscFree(df->f);CHKERRQ(ierr);
345   df->f = tmp;
346 
347   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
348   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n);CHKERRQ(ierr);
349   ierr = PetscFree(df->a);CHKERRQ(ierr);
350   df->a = tmp;
351 
352   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
353   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n);CHKERRQ(ierr);
354   ierr = PetscFree(df->l);CHKERRQ(ierr);
355   df->l = tmp;
356 
357   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
358   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n);CHKERRQ(ierr);
359   ierr = PetscFree(df->u);CHKERRQ(ierr);
360   df->u = tmp;
361 
362   ierr = PetscMalloc1(n, &tmp);CHKERRQ(ierr);
363   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n);CHKERRQ(ierr);
364   ierr = PetscFree(df->x);CHKERRQ(ierr);
365   df->x = tmp;
366 
367   ierr = PetscMalloc1(n, &tmp_Q);CHKERRQ(ierr);
368   for (i = 0; i < n; i ++) {
369     ierr = PetscMalloc1(n, &tmp_Q[i]);CHKERRQ(ierr);
370     if (i < old_n) {
371       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n);CHKERRQ(ierr);
372       ierr = PetscFree(df->Q[i]);CHKERRQ(ierr);
373     }
374   }
375 
376   ierr = PetscFree(df->Q);CHKERRQ(ierr);
377   df->Q = tmp_Q;
378 
379   ierr = PetscFree(df->g);CHKERRQ(ierr);
380   ierr = PetscMalloc1(n, &df->g);CHKERRQ(ierr);
381 
382   ierr = PetscFree(df->y);CHKERRQ(ierr);
383   ierr = PetscMalloc1(n, &df->y);CHKERRQ(ierr);
384 
385   ierr = PetscFree(df->tempv);CHKERRQ(ierr);
386   ierr = PetscMalloc1(n, &df->tempv);CHKERRQ(ierr);
387 
388   ierr = PetscFree(df->d);CHKERRQ(ierr);
389   ierr = PetscMalloc1(n, &df->d);CHKERRQ(ierr);
390 
391   ierr = PetscFree(df->Qd);CHKERRQ(ierr);
392   ierr = PetscMalloc1(n, &df->Qd);CHKERRQ(ierr);
393 
394   ierr = PetscFree(df->t);CHKERRQ(ierr);
395   ierr = PetscMalloc1(n, &df->t);CHKERRQ(ierr);
396 
397   ierr = PetscFree(df->xplus);CHKERRQ(ierr);
398   ierr = PetscMalloc1(n, &df->xplus);CHKERRQ(ierr);
399 
400   ierr = PetscFree(df->tplus);CHKERRQ(ierr);
401   ierr = PetscMalloc1(n, &df->tplus);CHKERRQ(ierr);
402 
403   ierr = PetscFree(df->sk);CHKERRQ(ierr);
404   ierr = PetscMalloc1(n, &df->sk);CHKERRQ(ierr);
405 
406   ierr = PetscFree(df->yk);CHKERRQ(ierr);
407   ierr = PetscMalloc1(n, &df->yk);CHKERRQ(ierr);
408 
409   ierr = PetscFree(df->ipt);CHKERRQ(ierr);
410   ierr = PetscMalloc1(n, &df->ipt);CHKERRQ(ierr);
411 
412   ierr = PetscFree(df->ipt2);CHKERRQ(ierr);
413   ierr = PetscMalloc1(n, &df->ipt2);CHKERRQ(ierr);
414 
415   ierr = PetscFree(df->uv);CHKERRQ(ierr);
416   ierr = PetscMalloc1(n, &df->uv);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode destroy_df_solver(TAO_DF *df)
421 {
422   PetscErrorCode ierr;
423   PetscInt       i;
424 
425   PetscFunctionBegin;
426   ierr = PetscFree(df->f);CHKERRQ(ierr);
427   ierr = PetscFree(df->a);CHKERRQ(ierr);
428   ierr = PetscFree(df->l);CHKERRQ(ierr);
429   ierr = PetscFree(df->u);CHKERRQ(ierr);
430   ierr = PetscFree(df->x);CHKERRQ(ierr);
431 
432   for (i = 0; i < df->cur_num_cp; i ++) {
433     ierr = PetscFree(df->Q[i]);CHKERRQ(ierr);
434   }
435   ierr = PetscFree(df->Q);CHKERRQ(ierr);
436   ierr = PetscFree(df->ipt);CHKERRQ(ierr);
437   ierr = PetscFree(df->ipt2);CHKERRQ(ierr);
438   ierr = PetscFree(df->uv);CHKERRQ(ierr);
439   ierr = PetscFree(df->g);CHKERRQ(ierr);
440   ierr = PetscFree(df->y);CHKERRQ(ierr);
441   ierr = PetscFree(df->tempv);CHKERRQ(ierr);
442   ierr = PetscFree(df->d);CHKERRQ(ierr);
443   ierr = PetscFree(df->Qd);CHKERRQ(ierr);
444   ierr = PetscFree(df->t);CHKERRQ(ierr);
445   ierr = PetscFree(df->xplus);CHKERRQ(ierr);
446   ierr = PetscFree(df->tplus);CHKERRQ(ierr);
447   ierr = PetscFree(df->sk);CHKERRQ(ierr);
448   ierr = PetscFree(df->yk);CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
453 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
454 {
455   PetscReal r = 0.0;
456   PetscInt  i;
457 
458   for (i = 0; i < n; i++){
459     x[i] = -c[i] + lambda*a[i];
460     if (x[i] > u[i])     x[i] = u[i];
461     else if(x[i] < l[i]) x[i] = l[i];
462     r += a[i]*x[i];
463   }
464   return r - b;
465 }
466 
467 /** Modified Dai-Fletcher QP projector solves the problem:
468  *
469  *      minimise  0.5*x'*x - c'*x
470  *      subj to   a'*x = b
471  *                l \leq x \leq u
472  *
473  *  \param c The point to be projected onto feasible set
474  */
475 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
476 {
477   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
478   PetscReal      r, rl, ru, s;
479   PetscInt       innerIter;
480   PetscBool      nonNegativeSlack = PETSC_FALSE;
481   PetscErrorCode ierr;
482 
483   *lam_ext = 0;
484   lambda  = 0;
485   dlambda = 0.5;
486   innerIter = 1;
487 
488   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
489    *
490    *  Optimality conditions for \phi:
491    *
492    *  1. lambda   <= 0
493    *  2. r        <= 0
494    *  3. r*lambda == 0
495    */
496 
497   /* Bracketing Phase */
498   r = phi(x, n, lambda, a, b, c, l, u);
499 
500   if(nonNegativeSlack) {
501     /* inequality constraint, i.e., with \xi >= 0 constraint */
502     if (r < TOL_R) return 0;
503   } else  {
504     /* equality constraint ,i.e., without \xi >= 0 constraint */
505     if (PetscAbsReal(r) < TOL_R) return 0;
506   }
507 
508   if (r < 0.0){
509     lambdal = lambda;
510     rl      = r;
511     lambda  = lambda + dlambda;
512     r       = phi(x, n, lambda, a, b, c, l, u);
513     while (r < 0.0 && dlambda < BMRM_INFTY)  {
514       lambdal = lambda;
515       s       = rl/r - 1.0;
516       if (s < 0.1) s = 0.1;
517       dlambda = dlambda + dlambda/s;
518       lambda  = lambda + dlambda;
519       rl      = r;
520       r       = phi(x, n, lambda, a, b, c, l, u);
521     }
522     lambdau = lambda;
523     ru      = r;
524   } else {
525     lambdau = lambda;
526     ru      = r;
527     lambda  = lambda - dlambda;
528     r       = phi(x, n, lambda, a, b, c, l, u);
529     while (r > 0.0 && dlambda > -BMRM_INFTY) {
530       lambdau = lambda;
531       s       = ru/r - 1.0;
532       if (s < 0.1) s = 0.1;
533       dlambda = dlambda + dlambda/s;
534       lambda  = lambda - dlambda;
535       ru      = r;
536       r       = phi(x, n, lambda, a, b, c, l, u);
537     }
538     lambdal = lambda;
539     rl      = r;
540   }
541 
542   if(PetscAbsReal(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
543 
544   if(ru == 0){
545     return innerIter;
546   }
547 
548   /* Secant Phase */
549   s       = 1.0 - rl/ru;
550   dlambda = dlambda/s;
551   lambda  = lambdau - dlambda;
552   r       = phi(x, n, lambda, a, b, c, l, u);
553 
554   while (PetscAbsReal(r) > TOL_R
555          && dlambda > TOL_LAM * (1.0 + PetscAbsReal(lambda))
556          && innerIter < df->maxProjIter){
557     innerIter++;
558     if (r > 0.0){
559       if (s <= 2.0){
560         lambdau = lambda;
561         ru      = r;
562         s       = 1.0 - rl/ru;
563         dlambda = (lambdau - lambdal) / s;
564         lambda  = lambdau - dlambda;
565       } else {
566         s          = ru/r-1.0;
567         if (s < 0.1) s = 0.1;
568         dlambda    = (lambdau - lambda) / s;
569         lambda_new = 0.75*lambdal + 0.25*lambda;
570         if (lambda_new < (lambda - dlambda))
571           lambda_new = lambda - dlambda;
572         lambdau    = lambda;
573         ru         = r;
574         lambda     = lambda_new;
575         s          = (lambdau - lambdal) / (lambdau - lambda);
576       }
577     } else {
578       if (s >= 2.0){
579         lambdal = lambda;
580         rl      = r;
581         s       = 1.0 - rl/ru;
582         dlambda = (lambdau - lambdal) / s;
583         lambda  = lambdau - dlambda;
584       } else {
585         s          = rl/r - 1.0;
586         if (s < 0.1) s = 0.1;
587         dlambda    = (lambda-lambdal) / s;
588         lambda_new = 0.75*lambdau + 0.25*lambda;
589         if (lambda_new > (lambda + dlambda))
590           lambda_new = lambda + dlambda;
591         lambdal    = lambda;
592         rl         = r;
593         lambda     = lambda_new;
594         s          = (lambdau - lambdal) / (lambdau-lambda);
595       }
596     }
597     r = phi(x, n, lambda, a, b, c, l, u);
598   }
599 
600   *lam_ext = lambda;
601   if(innerIter >= df->maxProjIter) {
602     ierr = PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
603   }
604   return innerIter;
605 }
606 
607 
608 PetscErrorCode solve(TAO_DF *df)
609 {
610   PetscErrorCode ierr;
611   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
612   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
613   PetscReal      DELTAsv, ProdDELTAsv;
614   PetscReal      c, *tempQ;
615   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
616   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
617   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
618   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
619   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
620 
621   /*** variables for the adaptive nonmonotone linesearch ***/
622   PetscInt    L, llast;
623   PetscReal   fr, fbest, fv, fc, fv0;
624 
625   c = BMRM_INFTY;
626 
627   DELTAsv = EPS_SV;
628   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
629   else  ProdDELTAsv = EPS_SV;
630 
631   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
632 
633   lam_ext = 0.0;
634 
635   /* Project the initial solution */
636   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
637 
638   /* Compute gradient
639      g = Q*x + f; */
640 
641   it = 0;
642   for (i = 0; i < dim; i++) {
643     if (PetscAbsReal(x[i]) > ProdDELTAsv) ipt[it++] = i;
644   }
645 
646   ierr = PetscMemzero(t, dim*sizeof(PetscReal));CHKERRQ(ierr);
647   for (i = 0; i < it; i++){
648     tempQ = Q[ipt[i]];
649     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
650   }
651   for (i = 0; i < dim; i++){
652     g[i] = t[i] + f[i];
653   }
654 
655 
656   /* y = -(x_{k} - g_{k}) */
657   for (i = 0; i < dim; i++){
658     y[i] = g[i] - x[i];
659   }
660 
661   /* Project x_{k} - g_{k} */
662   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
663 
664   /* y = P(x_{k} - g_{k}) - x_{k} */
665   max = ALPHA_MIN;
666   for (i = 0; i < dim; i++){
667     y[i] = tempv[i] - x[i];
668     if (PetscAbsReal(y[i]) > max) max = PetscAbsReal(y[i]);
669   }
670 
671   if (max < tol*1e-3){
672     return 0;
673   }
674 
675   alpha = 1.0 / max;
676 
677   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
678   fv0   = 0.0;
679   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
680 
681   /*** adaptive nonmonotone linesearch ***/
682   L     = 2;
683   fr    = ALPHA_MAX;
684   fbest = fv0;
685   fc    = fv0;
686   llast = 0;
687   akold = bkold = 0.0;
688 
689   /***      Iterator begins     ***/
690   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
691 
692     /* tempv = -(x_{k} - alpha*g_{k}) */
693     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
694 
695     /* Project x_{k} - alpha*g_{k} */
696     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
697 
698 
699     /* gd = \inner{d_{k}}{g_{k}}
700         d = P(x_{k} - alpha*g_{k}) - x_{k}
701     */
702     gd = 0.0;
703     for (i = 0; i < dim; i++){
704       d[i] = y[i] - x[i];
705       gd  += d[i] * g[i];
706     }
707 
708     /* Gradient computation  */
709 
710     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
711 
712     it = it2 = 0;
713     for (i = 0; i < dim; i++){
714       if (PetscAbsReal(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
715     }
716     for (i = 0; i < dim; i++) {
717       if (PetscAbsReal(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
718     }
719 
720     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal));CHKERRQ(ierr);
721     /* compute Qd = Q*d */
722     if (it < it2){
723       for (i = 0; i < it; i++){
724         tempQ = Q[ipt[i]];
725         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
726       }
727     } else { /* compute Qd = Q*y-t */
728       for (i = 0; i < it2; i++){
729         tempQ = Q[ipt2[i]];
730         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
731       }
732       for (j = 0; j < dim; j++) Qd[j] -= t[j];
733     }
734 
735     /* ak = inner{d_{k}}{d_{k}} */
736     ak = 0.0;
737     for (i = 0; i < dim; i++) ak += d[i] * d[i];
738 
739     bk = 0.0;
740     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
741 
742     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
743     else lamnew = 1.0;
744 
745     /* fv is computing f(x_{k} + d_{k}) */
746     fv = 0.0;
747     for (i = 0; i < dim; i++){
748       xplus[i] = x[i] + d[i];
749       tplus[i] = t[i] + Qd[i];
750       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
751     }
752 
753     /* fr is fref */
754     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
755       lscount++;
756       fv = 0.0;
757       for (i = 0; i < dim; i++){
758         xplus[i] = x[i] + lamnew*d[i];
759         tplus[i] = t[i] + lamnew*Qd[i];
760         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
761       }
762     }
763 
764     for (i = 0; i < dim; i++){
765       sk[i] = xplus[i] - x[i];
766       yk[i] = tplus[i] - t[i];
767       x[i]  = xplus[i];
768       t[i]  = tplus[i];
769       g[i]  = t[i] + f[i];
770     }
771 
772     /* update the line search control parameters */
773     if (fv < fbest){
774       fbest = fv;
775       fc    = fv;
776       llast = 0;
777     } else {
778       fc = (fc > fv ? fc : fv);
779       llast++;
780       if (llast == L){
781         fr    = fc;
782         fc    = fv;
783         llast = 0;
784       }
785     }
786 
787     ak = bk = 0.0;
788     for (i = 0; i < dim; i++){
789       ak += sk[i] * sk[i];
790       bk += sk[i] * yk[i];
791     }
792 
793     if (bk <= EPS*ak) alpha = ALPHA_MAX;
794     else {
795       if (bkold < EPS*akold) alpha = ak/bk;
796       else alpha = (akold+ak)/(bkold+bk);
797 
798       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
799       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
800     }
801 
802     akold = ak;
803     bkold = bk;
804 
805     /*** stopping criterion based on KKT conditions ***/
806     /* at optimal, gradient of lagrangian w.r.t. x is zero */
807 
808     bk = 0.0;
809     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
810 
811     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)){
812       it     = 0;
813       luv    = 0;
814       kktlam = 0.0;
815       for (i = 0; i < dim; i++){
816         /* x[i] is active hence lagrange multipliers for box constraints
817                 are zero. The lagrange multiplier for ineq. const. is then
818                 defined as below
819         */
820         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
821           ipt[it++] = i;
822           kktlam    = kktlam - a[i]*g[i];
823         } else  uv[luv++] = i;
824       }
825 
826       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
827       else {
828         kktlam = kktlam/it;
829         info   = 1;
830         for (i = 0; i < it; i++) {
831           if (PetscAbsReal(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
832             info = 0;
833             break;
834           }
835         }
836         if (info == 1)  {
837           for (i = 0; i < luv; i++)  {
838             if (x[uv[i]] <= DELTAsv){
839               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
840                      not be zero. So, the gradient without beta is > 0
841               */
842               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
843                 info = 0;
844                 break;
845               }
846             } else {
847               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
848                      not be zero. So, the gradient without eta is < 0
849               */
850               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
851                 info = 0;
852                 break;
853               }
854             }
855           }
856         }
857 
858         if (info == 1) return 0;
859       }
860     }
861   }
862   return 0;
863 }
864 
865 
866