xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision 5b0d99aef51158554e42172a250c96a448fd1e2b)
1 #include "bmrm.h"
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 
11 /*------------------------------------------------------------*/
12 /* The main solver function
13 
14    f = Remp(W)          This is what the user provides us from the application layer
15    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
16 
17    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
18 */
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "make_grad_node"
22 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = PetscNew(p);CHKERRQ(ierr);
28   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
29   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
30   (*p)->next = NULL;
31   PetscFunctionReturn(0);
32 }
33 
34 #undef __FUNCT__
35 #define __FUNCT__ "destroy_grad_list"
36 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
37 {
38   PetscErrorCode ierr;
39   Vec_Chain      *p = head->next, *q;
40 
41   PetscFunctionBegin;
42   while(p) {
43     q = p->next;
44     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
45     ierr = PetscFree(p);CHKERRQ(ierr);
46     p = q;
47   }
48   head->next = NULL;
49   PetscFunctionReturn(0);
50 }
51 
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "TaoSolve_BMRM"
55 static PetscErrorCode TaoSolve_BMRM(TaoSolver tao)
56 {
57   PetscErrorCode             ierr;
58   TaoSolverTerminationReason reason;
59   TAO_DF                     df;
60   TAO_BMRM                   *bmrm = (TAO_BMRM*)tao->data;
61 
62   /* Values and pointers to parts of the optimization problem */
63   PetscReal                  f = 0.0;
64   Vec                        W = tao->solution;
65   Vec                        G = tao->gradient;
66   PetscReal                  lambda;
67   PetscReal                  bt;
68   Vec_Chain                  grad_list, *tail_glist, *pgrad;
69   PetscInt                   iter = 0;
70   PetscInt                   i;
71   PetscMPIInt                rank;
72 
73   /* Used in termination criteria check */
74   PetscReal                  reg;
75   PetscReal                  jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
76   PetscReal                  innerSolverTol;
77 
78   PetscFunctionBegin;
79   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
80   lambda = bmrm->lambda;
81 
82   /* Check Stopping Condition */
83   tao->step = 1.0;
84   max_jtwt = -BMRM_INFTY;
85   min_jw = BMRM_INFTY;
86   innerSolverTol = 1.0;
87   epsilon = 0.0;
88 
89   if (!rank) {
90     ierr = init_df_solver(&df);CHKERRQ(ierr);
91     grad_list.next = NULL;
92     tail_glist = &grad_list;
93   }
94 
95   df.tol = 1e-6;
96   reason = TAO_CONTINUE_ITERATING;
97 
98   /*-----------------Algorithm Begins------------------------*/
99   /* make the scatter */
100   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
101   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
102   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
103 
104   /* NOTE: In application pass the sub-gradient of Remp(W) */
105   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
106   ierr = TaoMonitor(tao,iter,f,1.0,0.0,tao->step,&reason);CHKERRQ(ierr);
107   while (reason == TAO_CONTINUE_ITERATING) {
108     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
109     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
110     bt = f - bt;
111 
112     /* First gather the gradient to the master node */
113     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
114     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
115 
116     /* Bring up the inner solver */
117     if (!rank) {
118       ierr = ensure_df_space(iter+1, &df); CHKERRQ(ierr);
119       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
120       tail_glist->next = pgrad;
121       tail_glist = pgrad;
122 
123       df.a[iter] = 1.0;
124       df.f[iter] = -bt;
125       df.u[iter] = 1.0;
126       df.l[iter] = 0.0;
127 
128       /* set up the Q */
129       pgrad = grad_list.next;
130       for (i=0; i<=iter; i++) {
131         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
132         df.Q[i][iter] = df.Q[iter][i] = reg / lambda;
133         pgrad = pgrad->next;
134       }
135 
136       if (iter > 0) {
137         df.x[iter] = 0.0;
138         ierr = solve(&df); CHKERRQ(ierr);
139       } else
140         df.x[0] = 1.0;
141 
142       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
143       jtwt = 0.0;
144       ierr = VecSet(bmrm->local_w, 0.0); CHKERRQ(ierr);
145       pgrad = grad_list.next;
146       for (i=0; i<=iter; i++) {
147         jtwt -= df.x[i] * df.f[i];
148         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V); CHKERRQ(ierr);
149         pgrad = pgrad->next;
150       }
151 
152       ierr = VecNorm(bmrm->local_w, NORM_2, &reg); CHKERRQ(ierr);
153       reg = 0.5*lambda*reg*reg;
154       jtwt -= reg;
155     } /* end if rank == 0 */
156 
157     /* scatter the new W to all nodes */
158     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
159     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
160 
161     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
162 
163     MPI_Bcast(&jtwt,1,MPI_DOUBLE,0,PETSC_COMM_WORLD);
164     MPI_Bcast(&reg,1,MPI_DOUBLE,0,PETSC_COMM_WORLD);
165 
166     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
167     if (jw < min_jw) min_jw = jw;
168     if (jtwt > max_jtwt) max_jtwt = jtwt;
169 
170     pre_epsilon = epsilon;
171     epsilon = min_jw - jtwt;
172 
173     if (!rank) {
174       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
175       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
176 
177       /* if the annealing doesn't work well, lower the inner solver tolerance */
178       if(pre_epsilon < epsilon) innerSolverTol *= 0.2;
179 
180       df.tol = innerSolverTol*0.5;
181     }
182 
183     iter++;
184     ierr = TaoMonitor(tao,iter,min_jw,epsilon,0.0,tao->step,&reason);CHKERRQ(ierr);
185   }
186 
187   /* free all the memory */
188   if (!rank) {
189     ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
190     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
191   }
192 
193   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
194   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
195   PetscFunctionReturn(0);
196 }
197 
198 
199 /* ---------------------------------------------------------- */
200 
201 #undef __FUNCT__
202 #define __FUNCT__ "TaoSetup_BMRM"
203 static PetscErrorCode TaoSetup_BMRM(TaoSolver tao)
204 {
205 
206   PetscErrorCode ierr;
207 
208   PetscFunctionBegin;
209   /* Allocate some arrays */
210   if (!tao->gradient) {
211     ierr = VecDuplicate(tao->solution, &tao->gradient);   CHKERRQ(ierr);
212   }
213   PetscFunctionReturn(0);
214 }
215 
216 /*------------------------------------------------------------*/
217 #undef __FUNCT__
218 #define __FUNCT__ "TaoDestroy_BMRM"
219 static PetscErrorCode TaoDestroy_BMRM(TaoSolver tao)
220 {
221   PetscErrorCode ierr;
222 
223   PetscFunctionBegin;
224   ierr = PetscFree(tao->data);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 #undef __FUNCT__
229 #define __FUNCT__ "TaoSetFromOptions_BMRM"
230 static PetscErrorCode TaoSetFromOptions_BMRM(TaoSolver tao)
231 {
232   PetscErrorCode ierr;
233   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
234   PetscBool      flg;
235 
236   PetscFunctionBegin;
237   ierr = PetscOptionsHead("BMRM for regularized risk minimization");CHKERRQ(ierr);
238   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,&flg); CHKERRQ(ierr);
239   ierr = PetscOptionsTail();CHKERRQ(ierr);
240   PetscFunctionReturn(0);
241 }
242 
243 /*------------------------------------------------------------*/
244 #undef __FUNCT__
245 #define __FUNCT__ "TaoView_BMRM"
246 static PetscErrorCode TaoView_BMRM(TaoSolver tao, PetscViewer viewer)
247 {
248   PetscBool      isascii;
249   PetscErrorCode ierr;
250 
251   PetscFunctionBegin;
252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
253   if (isascii) {
254     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
255     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 /*------------------------------------------------------------*/
261 EXTERN_C_BEGIN
262 #undef __FUNCT__
263 #define __FUNCT__ "TaoCreate_BMRM"
264 PetscErrorCode TaoCreate_BMRM(TaoSolver tao)
265 {
266   TAO_BMRM       *bmrm;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   tao->ops->setup = TaoSetup_BMRM;
271   tao->ops->solve = TaoSolve_BMRM;
272   tao->ops->view  = TaoView_BMRM;
273   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
274   tao->ops->destroy = TaoDestroy_BMRM;
275 
276   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
277   bmrm->lambda = 1.0;
278   tao->data = (void*)bmrm;
279 
280   /* Note: May need to be tuned! */
281   tao->max_it = 2048;
282   tao->max_funcs = 300000;
283   tao->fatol = 1e-20;
284   tao->frtol = 1e-25;
285   tao->gatol = 1e-25;
286   tao->grtol = 1e-25;
287 
288   PetscFunctionReturn(0);
289 }
290 EXTERN_C_END
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "init_df_solver"
294 PetscErrorCode init_df_solver(TAO_DF *df)
295 {
296   PetscInt       i, n = INCRE_DIM;
297   PetscErrorCode ierr;
298 
299   PetscFunctionBegin;
300   /* default values */
301   df->maxProjIter = 200;
302   df->maxPGMIter = 300000;
303   df->b = 1.0;
304 
305   /* memory space required by Dai-Fletcher */
306   df->cur_num_cp = n;
307   ierr = PetscMalloc1(n, &df->f); CHKERRQ(ierr);
308   ierr = PetscMalloc1(n, &df->a); CHKERRQ(ierr);
309   ierr = PetscMalloc1(n, &df->l); CHKERRQ(ierr);
310   ierr = PetscMalloc1(n, &df->u); CHKERRQ(ierr);
311   ierr = PetscMalloc1(n, &df->x); CHKERRQ(ierr);
312   ierr = PetscMalloc1(n, &df->Q); CHKERRQ(ierr);
313 
314   for (i = 0; i < n; i ++) {
315     ierr = PetscMalloc1(n, &df->Q[i]); CHKERRQ(ierr);
316   }
317 
318   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
319   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
320   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
321   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
322   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
323   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
324   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
325   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
326   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
327   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
328 
329   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
330   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
331   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
332   PetscFunctionReturn(0);
333 }
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "ensure_df_space"
337 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
338 {
339   PetscErrorCode ierr;
340   PetscReal      *tmp, **tmp_Q;
341   PetscInt       i, n, old_n;
342 
343   PetscFunctionBegin;
344   df->dim = dim;
345   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
346 
347   old_n = df->cur_num_cp;
348   df->cur_num_cp += INCRE_DIM;
349   n = df->cur_num_cp;
350 
351   /* memory space required by dai-fletcher */
352   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
353   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
354   ierr = PetscFree(df->f); CHKERRQ(ierr);
355   df->f = tmp;
356 
357   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
358   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
359   ierr = PetscFree(df->a); CHKERRQ(ierr);
360   df->a = tmp;
361 
362   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
363   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
364   ierr = PetscFree(df->l); CHKERRQ(ierr);
365   df->l = tmp;
366 
367   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
368   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
369   ierr = PetscFree(df->u); CHKERRQ(ierr);
370   df->u = tmp;
371 
372   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
373   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
374   ierr = PetscFree(df->x); CHKERRQ(ierr);
375   df->x = tmp;
376 
377   ierr = PetscMalloc1(n, &tmp_Q); CHKERRQ(ierr);
378   for (i = 0; i < n; i ++) {
379     ierr = PetscMalloc1(n, &tmp_Q[i]); CHKERRQ(ierr);
380     if (i < old_n) {
381       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n); CHKERRQ(ierr);
382       ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
383     }
384   }
385 
386   ierr = PetscFree(df->Q); CHKERRQ(ierr);
387   df->Q = tmp_Q;
388 
389   ierr = PetscFree(df->g); CHKERRQ(ierr);
390   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
391 
392   ierr = PetscFree(df->y); CHKERRQ(ierr);
393   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
394 
395   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
396   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
397 
398   ierr = PetscFree(df->d); CHKERRQ(ierr);
399   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
400 
401   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
402   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
403 
404   ierr = PetscFree(df->t); CHKERRQ(ierr);
405   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
406 
407   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
408   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
409 
410   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
411   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
412 
413   ierr = PetscFree(df->sk); CHKERRQ(ierr);
414   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
415 
416   ierr = PetscFree(df->yk); CHKERRQ(ierr);
417   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
418 
419   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
420   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
421 
422   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
423   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
424 
425   ierr = PetscFree(df->uv); CHKERRQ(ierr);
426   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
427   PetscFunctionReturn(0);
428 }
429 
430 #undef __FUNCT__
431 #define __FUNCT__ "destroy_df_solver"
432 PetscErrorCode destroy_df_solver(TAO_DF *df)
433 {
434   PetscErrorCode ierr;
435   PetscInt       i;
436 
437   PetscFunctionBegin;
438   ierr = PetscFree(df->f); CHKERRQ(ierr);
439   ierr = PetscFree(df->a); CHKERRQ(ierr);
440   ierr = PetscFree(df->l); CHKERRQ(ierr);
441   ierr = PetscFree(df->u); CHKERRQ(ierr);
442   ierr = PetscFree(df->x); CHKERRQ(ierr);
443 
444   for (i = 0; i < df->cur_num_cp; i ++) {
445     ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
446   }
447   ierr = PetscFree(df->Q); CHKERRQ(ierr);
448   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
449   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
450   ierr = PetscFree(df->uv); CHKERRQ(ierr);
451   ierr = PetscFree(df->g); CHKERRQ(ierr);
452   ierr = PetscFree(df->y); CHKERRQ(ierr);
453   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
454   ierr = PetscFree(df->d); CHKERRQ(ierr);
455   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
456   ierr = PetscFree(df->t); CHKERRQ(ierr);
457   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
458   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
459   ierr = PetscFree(df->sk); CHKERRQ(ierr);
460   ierr = PetscFree(df->yk); CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
465 #undef __FUNCT__
466 #define __FUNCT__ "phi"
467 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
468 {
469   PetscReal r = 0.0;
470   PetscInt  i;
471 
472   for (i = 0; i < n; i++){
473     x[i] = -c[i] + lambda*a[i];
474     if (x[i] > u[i])     x[i] = u[i];
475     else if(x[i] < l[i]) x[i] = l[i];
476     r += a[i]*x[i];
477   }
478   return r - b;
479 }
480 
481 /** Modified Dai-Fletcher QP projector solves the problem:
482  *
483  *      minimise  0.5*x'*x - c'*x
484  *      subj to   a'*x = b
485  *                l \leq x \leq u
486  *
487  *  \param c The point to be projected onto feasible set
488  */
489 #undef __FUNCT__
490 #define __FUNCT__ "project"
491 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
492 {
493   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
494   PetscReal      r, rl, ru, s;
495   PetscInt       innerIter;
496   PetscBool      nonNegativeSlack = PETSC_FALSE;
497   PetscErrorCode ierr;
498 
499   *lam_ext = 0;
500   lambda  = 0;
501   dlambda = 0.5;
502   innerIter = 1;
503 
504   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
505    *
506    *  Optimality conditions for \phi:
507    *
508    *  1. lambda   <= 0
509    *  2. r        <= 0
510    *  3. r*lambda == 0
511    */
512 
513   /* Bracketing Phase */
514   r = phi(x, n, lambda, a, b, c, l, u);
515 
516   if(nonNegativeSlack) {
517     /* inequality constraint, i.e., with \xi >= 0 constraint */
518     if (r < TOL_R) return 0;
519   } else  {
520     /* equality constraint ,i.e., without \xi >= 0 constraint */
521     if (fabs(r) < TOL_R) return 0;
522   }
523 
524   if (r < 0.0){
525     lambdal = lambda;
526     rl      = r;
527     lambda  = lambda + dlambda;
528     r       = phi(x, n, lambda, a, b, c, l, u);
529     while (r < 0.0 && dlambda < BMRM_INFTY)  {
530       lambdal = lambda;
531       s       = rl/r - 1.0;
532       if (s < 0.1) s = 0.1;
533       dlambda = dlambda + dlambda/s;
534       lambda  = lambda + dlambda;
535       rl      = r;
536       r       = phi(x, n, lambda, a, b, c, l, u);
537     }
538     lambdau = lambda;
539     ru      = r;
540   } else {
541     lambdau = lambda;
542     ru      = r;
543     lambda  = lambda - dlambda;
544     r       = phi(x, n, lambda, a, b, c, l, u);
545     while (r > 0.0 && dlambda > -BMRM_INFTY) {
546       lambdau = lambda;
547       s       = ru/r - 1.0;
548       if (s < 0.1) s = 0.1;
549       dlambda = dlambda + dlambda/s;
550       lambda  = lambda - dlambda;
551       ru      = r;
552       r       = phi(x, n, lambda, a, b, c, l, u);
553     }
554     lambdal = lambda;
555     rl      = r;
556   }
557 
558   if(fabs(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
559 
560   if(ru == 0){
561     lambda = lambdau;
562     r = phi(x, n, lambda, a, b, c, l, u);
563     return innerIter;
564   }
565 
566   /* Secant Phase */
567   s       = 1.0 - rl/ru;
568   dlambda = dlambda/s;
569   lambda  = lambdau - dlambda;
570   r       = phi(x, n, lambda, a, b, c, l, u);
571 
572   while (fabs(r) > TOL_R
573          && dlambda > TOL_LAM * (1.0 + fabs(lambda))
574          && innerIter < df->maxProjIter){
575     innerIter++;
576     if (r > 0.0){
577       if (s <= 2.0){
578         lambdau = lambda;
579         ru      = r;
580         s       = 1.0 - rl/ru;
581         dlambda = (lambdau - lambdal) / s;
582         lambda  = lambdau - dlambda;
583       } else {
584         s          = ru/r-1.0;
585         if (s < 0.1) s = 0.1;
586         dlambda    = (lambdau - lambda) / s;
587         lambda_new = 0.75*lambdal + 0.25*lambda;
588         if (lambda_new < (lambda - dlambda))
589           lambda_new = lambda - dlambda;
590         lambdau    = lambda;
591         ru         = r;
592         lambda     = lambda_new;
593         s          = (lambdau - lambdal) / (lambdau - lambda);
594       }
595     } else {
596       if (s >= 2.0){
597         lambdal = lambda;
598         rl      = r;
599         s       = 1.0 - rl/ru;
600         dlambda = (lambdau - lambdal) / s;
601         lambda  = lambdau - dlambda;
602       } else {
603         s          = rl/r - 1.0;
604         if (s < 0.1) s = 0.1;
605         dlambda    = (lambda-lambdal) / s;
606         lambda_new = 0.75*lambdau + 0.25*lambda;
607         if (lambda_new > (lambda + dlambda))
608           lambda_new = lambda + dlambda;
609         lambdal    = lambda;
610         rl         = r;
611         lambda     = lambda_new;
612         s          = (lambdau - lambdal) / (lambdau-lambda);
613       }
614     }
615     r = phi(x, n, lambda, a, b, c, l, u);
616   }
617 
618   *lam_ext = lambda;
619   if(innerIter >= df->maxProjIter) {
620     ierr = PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
621   }
622   return innerIter;
623 }
624 
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "solve"
628 PetscErrorCode solve(TAO_DF *df)
629 {
630   PetscErrorCode ierr;
631   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
632   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
633   PetscReal      DELTAsv, ProdDELTAsv;
634   PetscReal      c, *tempQ;
635   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
636   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
637   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
638   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
639   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
640 
641   /*** variables for the adaptive nonmonotone linesearch ***/
642   PetscInt    L, llast;
643   PetscReal   fr, fbest, fv, fc, fv0;
644 
645   c = BMRM_INFTY;
646 
647   DELTAsv = EPS_SV;
648   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
649   else  ProdDELTAsv = EPS_SV;
650 
651   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
652 
653   lam_ext = 0.0;
654 
655   /* Project the initial solution */
656   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
657 
658   /* Compute gradient
659      g = Q*x + f; */
660 
661   it = 0;
662   for (i = 0; i < dim; i++) {
663     if (fabs(x[i]) > ProdDELTAsv) ipt[it++] = i;
664   }
665 
666   ierr = PetscMemzero(t, dim*sizeof(PetscReal)); CHKERRQ(ierr);
667   for (i = 0; i < it; i++){
668     tempQ = Q[ipt[i]];
669     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
670   }
671   for (i = 0; i < dim; i++){
672     g[i] = t[i] + f[i];
673   }
674 
675 
676   /* y = -(x_{k} - g_{k}) */
677   for (i = 0; i < dim; i++){
678     y[i] = g[i] - x[i];
679   }
680 
681   /* Project x_{k} - g_{k} */
682   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
683 
684   /* y = P(x_{k} - g_{k}) - x_{k} */
685   max = ALPHA_MIN;
686   for (i = 0; i < dim; i++){
687     y[i] = tempv[i] - x[i];
688     if (fabs(y[i]) > max) max = fabs(y[i]);
689   }
690 
691   if (max < tol*1e-3){
692     lscount = 0;
693     innerIter    = 0;
694     return 0;
695   }
696 
697   alpha = 1.0 / max;
698 
699   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
700   fv0   = 0.0;
701   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
702 
703   /*** adaptive nonmonotone linesearch ***/
704   L     = 2;
705   fr    = ALPHA_MAX;
706   fbest = fv0;
707   fc    = fv0;
708   llast = 0;
709   akold = bkold = 0.0;
710 
711   /***      Iterator begins     ***/
712   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
713 
714     /* tempv = -(x_{k} - alpha*g_{k}) */
715     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
716 
717     /* Project x_{k} - alpha*g_{k} */
718     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
719 
720 
721     /* gd = \inner{d_{k}}{g_{k}}
722         d = P(x_{k} - alpha*g_{k}) - x_{k}
723     */
724     gd = 0.0;
725     for (i = 0; i < dim; i++){
726       d[i] = y[i] - x[i];
727       gd  += d[i] * g[i];
728     }
729 
730     /* Gradient computation  */
731 
732     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
733 
734     it = it2 = 0;
735     for (i = 0; i < dim; i++){
736       if (fabs(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
737     }
738     for (i = 0; i < dim; i++) {
739       if (fabs(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
740     }
741 
742     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal)); CHKERRQ(ierr);
743     /* compute Qd = Q*d */
744     if (it < it2){
745       for (i = 0; i < it; i++){
746         tempQ = Q[ipt[i]];
747         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
748       }
749     } else { /* compute Qd = Q*y-t */
750       for (i = 0; i < it2; i++){
751         tempQ = Q[ipt2[i]];
752         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
753       }
754       for (j = 0; j < dim; j++) Qd[j] -= t[j];
755     }
756 
757     /* ak = inner{d_{k}}{d_{k}} */
758     ak = 0.0;
759     for (i = 0; i < dim; i++) ak += d[i] * d[i];
760 
761     bk = 0.0;
762     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
763 
764     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
765     else lamnew = 1.0;
766 
767     /* fv is computing f(x_{k} + d_{k}) */
768     fv = 0.0;
769     for (i = 0; i < dim; i++){
770       xplus[i] = x[i] + d[i];
771       tplus[i] = t[i] + Qd[i];
772       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
773     }
774 
775     /* fr is fref */
776     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
777       lscount++;
778       fv = 0.0;
779       for (i = 0; i < dim; i++){
780         xplus[i] = x[i] + lamnew*d[i];
781         tplus[i] = t[i] + lamnew*Qd[i];
782         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
783       }
784     }
785 
786     for (i = 0; i < dim; i++){
787       sk[i] = xplus[i] - x[i];
788       yk[i] = tplus[i] - t[i];
789       x[i]  = xplus[i];
790       t[i]  = tplus[i];
791       g[i]  = t[i] + f[i];
792     }
793 
794     /* update the line search control parameters */
795     if (fv < fbest){
796       fbest = fv;
797       fc    = fv;
798       llast = 0;
799     } else {
800       fc = (fc > fv ? fc : fv);
801       llast++;
802       if (llast == L){
803         fr    = fc;
804         fc    = fv;
805         llast = 0;
806       }
807     }
808 
809     ak = bk = 0.0;
810     for (i = 0; i < dim; i++){
811       ak += sk[i] * sk[i];
812       bk += sk[i] * yk[i];
813     }
814 
815     if (bk <= EPS*ak) alpha = ALPHA_MAX;
816     else {
817       if (bkold < EPS*akold) alpha = ak/bk;
818       else alpha = (akold+ak)/(bkold+bk);
819 
820       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
821       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
822     }
823 
824     akold = ak;
825     bkold = bk;
826 
827     /*** stopping criterion based on KKT conditions ***/
828     /* at optimal, gradient of lagrangian w.r.t. x is zero */
829 
830     bk = 0.0;
831     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
832 
833     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)){
834       it     = 0;
835       luv    = 0;
836       kktlam = 0.0;
837       for (i = 0; i < dim; i++){
838         /* x[i] is active hence lagrange multipliers for box constraints
839                 are zero. The lagrange multiplier for ineq. const. is then
840                 defined as below
841         */
842         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
843           ipt[it++] = i;
844           kktlam    = kktlam - a[i]*g[i];
845         } else  uv[luv++] = i;
846       }
847 
848       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
849       else {
850         kktlam = kktlam/it;
851         info   = 1;
852         for (i = 0; i < it; i++) {
853           if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
854             info = 0;
855             break;
856           }
857         }
858         if (info == 1)  {
859           for (i = 0; i < luv; i++)  {
860             if (x[uv[i]] <= DELTAsv){
861               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
862                      not be zero. So, the gradient without beta is > 0
863               */
864               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
865                 info = 0;
866                 break;
867               }
868             } else {
869               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
870                      not be zero. So, the gradient without eta is < 0
871               */
872               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
873                 info = 0;
874                 break;
875               }
876             }
877           }
878         }
879 
880         if (info == 1) return 0;
881       }
882     }
883   }
884   return 0;
885 }
886 
887 
888