xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision 85f9fe2e472e494f624e07331fb0f6081bef5d80)
1 #include "bmrm.h"
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,
7                      PetscReal,PetscReal*,PetscReal*,PetscReal*);
8 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,
9                         PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
10 
11 static PetscErrorCode solve(TAO_DF*);
12 
13 
14 /*------------------------------------------------------------*/
15 /* The main solver function
16 
17    f = Remp(W)          This is what the user provides us from the application layer
18    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
19 
20    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
21 */
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "make_grad_node"
25 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
26 {
27   PetscErrorCode ierr;
28 
29   PetscFunctionBegin;
30 
31   ierr = PetscMalloc(sizeof(Vec_Chain), p);CHKERRQ(ierr);
32   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
33   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
34   (*p)->next = NULL;
35 
36   PetscFunctionReturn(0);
37 }
38 
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "destroy_grad_list"
42 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
43 {
44   PetscErrorCode ierr;
45   Vec_Chain *p = head->next, *q;
46 
47   PetscFunctionBegin;
48   while(p) {
49     q = p->next;
50     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
51     ierr = PetscFree(p);CHKERRQ(ierr);
52     p = q;
53   }
54   head->next = NULL;
55 
56   PetscFunctionReturn(0);
57 }
58 
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "TaoSolve_BMRM"
62 static PetscErrorCode TaoSolve_BMRM(TaoSolver tao)
63 {
64   PetscErrorCode ierr;
65   TaoSolverTerminationReason reason;
66   TAO_DF df;
67   TAO_BMRM *bmrm = (TAO_BMRM*)tao->data;
68 
69   /* Values and pointers to parts of the optimization problem */
70   PetscReal f = 0.0;
71   Vec W = tao->solution;
72   Vec G = tao->gradient;
73   PetscReal lambda;
74 
75   PetscReal bt;
76   Vec_Chain grad_list, *tail_glist, *pgrad;
77 
78   PetscInt iter = 0;
79   PetscInt i;
80   PetscMPIInt rank;
81 
82   /* Used in termination criteria check */
83   PetscReal reg;
84   PetscReal jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
85   PetscReal innerSolverTol;
86 
87   PetscFunctionBegin;
88 
89   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
90   lambda = bmrm->lambda;
91 
92   /* Check Stopping Condition */
93   tao->step = 1.0;
94   max_jtwt = -BMRM_INFTY;
95   min_jw = BMRM_INFTY;
96   innerSolverTol = 1.0;
97   epsilon = 0.0;
98 
99   if (rank == 0) {
100     ierr = init_df_solver(&df);CHKERRQ(ierr);
101     grad_list.next = NULL;
102     tail_glist = &grad_list;
103   }
104 
105   df.tol = 1e-6;
106   reason = TAO_CONTINUE_ITERATING;
107 
108   /*-----------------Algorithm Begins------------------------*/
109   /* make the scatter */
110   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
111   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
112   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
113 
114         /* NOTE: In application pass the sub-gradient of Remp(W) */
115   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
116   ierr = TaoMonitor(tao,iter,f,1.0,0.0,tao->step,&reason);CHKERRQ(ierr);
117   while (reason == TAO_CONTINUE_ITERATING) {
118     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
119     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
120     bt = f - bt;
121 
122     /* First gather the gradient to the master node */
123     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
124     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
125 
126     /* Bring up the inner solver */
127     if (rank == 0) {
128       ierr = ensure_df_space(iter+1, &df); CHKERRQ(ierr);
129       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
130       tail_glist->next = pgrad;
131       tail_glist = pgrad;
132 
133       df.a[iter] = 1.0;
134       df.f[iter] = -bt;
135       df.u[iter] = 1.0;
136       df.l[iter] = 0.0;
137 
138       /* set up the Q */
139       pgrad = grad_list.next;
140       for (i=0; i<=iter; i++) {
141         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
142         df.Q[i][iter] = df.Q[iter][i] = reg / lambda;
143         pgrad = pgrad->next;
144       }
145 
146       if (iter > 0) {
147         df.x[iter] = 0.0;
148         ierr = solve(&df); CHKERRQ(ierr);
149       }
150       else
151         df.x[0] = 1.0;
152 
153       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
154       jtwt = 0.0;
155       ierr = VecSet(bmrm->local_w, 0.0); CHKERRQ(ierr);
156       pgrad = grad_list.next;
157       for (i=0; i<=iter; i++) {
158         jtwt -= df.x[i] * df.f[i];
159         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V); CHKERRQ(ierr);
160         pgrad = pgrad->next;
161       }
162 
163       ierr = VecNorm(bmrm->local_w, NORM_2, &reg); CHKERRQ(ierr);
164       reg = 0.5*lambda*reg*reg;
165       jtwt -= reg;
166     } /* end if rank == 0 */
167 
168     /* scatter the new W to all nodes */
169     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
170     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
171 
172     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
173 
174     MPI_Bcast(&jtwt,1,MPI_DOUBLE,0,PETSC_COMM_WORLD);
175     MPI_Bcast(&reg,1,MPI_DOUBLE,0,PETSC_COMM_WORLD);
176 
177     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
178     if (jw < min_jw)
179       min_jw = jw;
180     if (jtwt > max_jtwt)
181       max_jtwt = jtwt;
182 
183     pre_epsilon = epsilon;
184     epsilon = min_jw - jtwt;
185 
186     if (rank == 0) {
187       if (innerSolverTol > epsilon)
188         innerSolverTol = epsilon;
189       else if (innerSolverTol < 1e-7)
190         innerSolverTol = 1e-7;
191 
192       /* if the annealing doesn't work well, lower the inner solver tolerance */
193       if(pre_epsilon < epsilon)
194         innerSolverTol *= 0.2;
195 
196       df.tol = innerSolverTol*0.5;
197     }
198 
199     iter++;
200     ierr = TaoMonitor(tao,iter,min_jw,epsilon,0.0,tao->step,&reason);CHKERRQ(ierr);
201   }
202 
203   /* free all the memory */
204   if (rank == 0) {
205     ierr = destroy_grad_list(&grad_list);       CHKERRQ(ierr);
206     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
207   }
208 
209   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
210   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
211 
212   PetscFunctionReturn(0);
213 }
214 
215 
216 /* ---------------------------------------------------------- */
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "TaoSetup_BMRM"
220 static PetscErrorCode TaoSetup_BMRM(TaoSolver tao) {
221 
222   PetscErrorCode      ierr;
223 
224   PetscFunctionBegin;
225 
226   /* Allocate some arrays */
227   if (!tao->gradient) {
228     ierr = VecDuplicate(tao->solution, &tao->gradient);   CHKERRQ(ierr);
229   }
230 
231   PetscFunctionReturn(0);
232 }
233 
234 
235 /*------------------------------------------------------------*/
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "TaoDestroy_BMRM"
239 static PetscErrorCode TaoDestroy_BMRM(TaoSolver tao)
240 {
241   PetscErrorCode ierr;
242 
243   PetscFunctionBegin;
244   ierr = PetscFree(tao->data);CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "TaoSetFromOptions_BMRM"
251 static PetscErrorCode TaoSetFromOptions_BMRM(TaoSolver tao)
252 {
253   PetscErrorCode ierr;
254   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
255   PetscBool      flg;
256 
257   PetscFunctionBegin;
258   ierr = PetscOptionsHead("BMRM for regularized risk minimization");CHKERRQ(ierr);
259   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,&flg); CHKERRQ(ierr);
260   ierr = PetscOptionsTail();CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 
265 /*------------------------------------------------------------*/
266 #undef __FUNCT__
267 #define __FUNCT__ "TaoView_BMRM"
268 static PetscErrorCode TaoView_BMRM(TaoSolver tao, PetscViewer viewer)
269 {
270   PetscBool           isascii;
271   PetscErrorCode      ierr;
272 
273   PetscFunctionBegin;
274   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
275   if (isascii) {
276     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
277     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 
283 /*------------------------------------------------------------*/
284 EXTERN_C_BEGIN
285 #undef __FUNCT__
286 #define __FUNCT__ "TaoCreate_BMRM"
287 PetscErrorCode TaoCreate_BMRM(TaoSolver tao)
288 {
289   TAO_BMRM       *bmrm;
290   PetscErrorCode ierr;
291 
292   PetscFunctionBegin;
293   tao->ops->setup = TaoSetup_BMRM;
294   tao->ops->solve = TaoSolve_BMRM;
295   tao->ops->view  = TaoView_BMRM;
296   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
297   tao->ops->destroy = TaoDestroy_BMRM;
298 
299   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
300   bmrm->lambda = 1.0;
301   tao->data = (void*)bmrm;
302 
303   /* Note: May need to be tuned! */
304   tao->max_it = 2048;
305   tao->max_funcs = 300000;
306   tao->fatol = 1e-20;
307   tao->frtol = 1e-25;
308   tao->gatol = 1e-25;
309   tao->grtol = 1e-25;
310 
311   PetscFunctionReturn(0);
312 }
313 EXTERN_C_END
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "init_df_solver"
317 PetscErrorCode init_df_solver(TAO_DF *df)
318 {
319   PetscInt       i, n = INCRE_DIM;
320   PetscErrorCode ierr;
321 
322   PetscFunctionBegin;
323   /* default values */
324   df->maxProjIter = 200;
325   df->maxPGMIter = 300000;
326   df->b = 1.0;
327 
328   /* memory space required by Dai-Fletcher */
329   df->cur_num_cp = n;
330   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->f); CHKERRQ(ierr);
331   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->a); CHKERRQ(ierr);
332   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->l); CHKERRQ(ierr);
333   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->u); CHKERRQ(ierr);
334   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->x); CHKERRQ(ierr);
335   ierr = PetscMalloc(sizeof(PetscReal*)*n, &df->Q); CHKERRQ(ierr);
336 
337   for (i = 0; i < n; i ++) {
338     ierr = PetscMalloc(sizeof(PetscReal)*n, &df->Q[i]); CHKERRQ(ierr);
339   }
340 
341   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->g); CHKERRQ(ierr);
342   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->y); CHKERRQ(ierr);
343   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tempv); CHKERRQ(ierr);
344   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->d); CHKERRQ(ierr);
345   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->Qd); CHKERRQ(ierr);
346   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->t); CHKERRQ(ierr);
347   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->xplus); CHKERRQ(ierr);
348   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tplus); CHKERRQ(ierr);
349   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->sk); CHKERRQ(ierr);
350   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->yk); CHKERRQ(ierr);
351 
352   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt); CHKERRQ(ierr);
353   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt2); CHKERRQ(ierr);
354   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->uv); CHKERRQ(ierr);
355   PetscFunctionReturn(0);
356 }
357 
358 #undef __FUNCT__
359 #define __FUNCT__ "ensure_df_space"
360 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
361 {
362   PetscErrorCode ierr;
363   PetscReal      *tmp, **tmp_Q;
364   PetscInt       i, n, old_n;
365 
366   PetscFunctionBegin;
367   df->dim = dim;
368   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
369 
370   old_n = df->cur_num_cp;
371   df->cur_num_cp += INCRE_DIM;
372   n = df->cur_num_cp;
373 
374   /* memory space required by dai-fletcher */
375   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp); CHKERRQ(ierr);
376   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
377   ierr = PetscFree(df->f); CHKERRQ(ierr);
378   df->f = tmp;
379 
380   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp); CHKERRQ(ierr);
381   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
382   ierr = PetscFree(df->a); CHKERRQ(ierr);
383   df->a = tmp;
384 
385   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp); CHKERRQ(ierr);
386   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
387   ierr = PetscFree(df->l); CHKERRQ(ierr);
388   df->l = tmp;
389 
390   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp); CHKERRQ(ierr);
391   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
392   ierr = PetscFree(df->u); CHKERRQ(ierr);
393   df->u = tmp;
394 
395   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp); CHKERRQ(ierr);
396   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
397   ierr = PetscFree(df->x); CHKERRQ(ierr);
398   df->x = tmp;
399 
400   ierr = PetscMalloc(sizeof(PetscReal*)*n, &tmp_Q); CHKERRQ(ierr);
401   for (i = 0; i < n; i ++) {
402     ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp_Q[i]); CHKERRQ(ierr);
403     if (i < old_n) {
404       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n); CHKERRQ(ierr);
405       ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
406     }
407   }
408 
409   ierr = PetscFree(df->Q); CHKERRQ(ierr);
410   df->Q = tmp_Q;
411 
412   ierr = PetscFree(df->g); CHKERRQ(ierr);
413   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->g); CHKERRQ(ierr);
414 
415   ierr = PetscFree(df->y); CHKERRQ(ierr);
416   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->y); CHKERRQ(ierr);
417 
418   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
419   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tempv); CHKERRQ(ierr);
420 
421   ierr = PetscFree(df->d); CHKERRQ(ierr);
422   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->d); CHKERRQ(ierr);
423 
424   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
425   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->Qd); CHKERRQ(ierr);
426 
427   ierr = PetscFree(df->t); CHKERRQ(ierr);
428   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->t); CHKERRQ(ierr);
429 
430   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
431   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->xplus); CHKERRQ(ierr);
432 
433   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
434   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tplus); CHKERRQ(ierr);
435 
436   ierr = PetscFree(df->sk); CHKERRQ(ierr);
437   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->sk); CHKERRQ(ierr);
438 
439   ierr = PetscFree(df->yk); CHKERRQ(ierr);
440   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->yk); CHKERRQ(ierr);
441 
442   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
443   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt); CHKERRQ(ierr);
444 
445   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
446   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt2); CHKERRQ(ierr);
447 
448   ierr = PetscFree(df->uv); CHKERRQ(ierr);
449   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->uv); CHKERRQ(ierr);
450   PetscFunctionReturn(0);
451 }
452 
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "destroy_df_solver"
456 PetscErrorCode destroy_df_solver(TAO_DF *df)
457 {
458   PetscErrorCode ierr;
459   PetscInt       i;
460 
461   PetscFunctionBegin;
462   ierr = PetscFree(df->f); CHKERRQ(ierr);
463   ierr = PetscFree(df->a); CHKERRQ(ierr);
464   ierr = PetscFree(df->l); CHKERRQ(ierr);
465   ierr = PetscFree(df->u); CHKERRQ(ierr);
466   ierr = PetscFree(df->x); CHKERRQ(ierr);
467 
468   for (i = 0; i < df->cur_num_cp; i ++) {
469     ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
470   }
471   ierr = PetscFree(df->Q); CHKERRQ(ierr);
472   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
473   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
474   ierr = PetscFree(df->uv); CHKERRQ(ierr);
475   ierr = PetscFree(df->g); CHKERRQ(ierr);
476   ierr = PetscFree(df->y); CHKERRQ(ierr);
477   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
478   ierr = PetscFree(df->d); CHKERRQ(ierr);
479   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
480   ierr = PetscFree(df->t); CHKERRQ(ierr);
481   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
482   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
483   ierr = PetscFree(df->sk); CHKERRQ(ierr);
484   ierr = PetscFree(df->yk); CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 
488 
489 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
490 #undef __FUNCT__
491 #define __FUNCT__ "phi"
492 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
493 {
494   PetscReal r = 0.0;
495   PetscInt  i;
496 
497   for (i = 0; i < n; i++){
498     x[i] = -c[i] + lambda*a[i];
499     if (x[i] > u[i])     x[i] = u[i];
500     else if(x[i] < l[i]) x[i] = l[i];
501     r += a[i]*x[i];
502   }
503   return r - b;
504 }
505 
506 /** Modified Dai-Fletcher QP projector solves the problem:
507  *
508  *      minimise  0.5*x'*x - c'*x
509  *      subj to   a'*x = b
510  *                l \leq x \leq u
511  *
512  *  \param c The point to be projected onto feasible set
513  */
514 #undef __FUNCT__
515 #define __FUNCT__ "project"
516 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
517 {
518   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
519   PetscReal      r, rl, ru, s;
520   PetscInt       innerIter;
521   PetscBool      nonNegativeSlack = PETSC_FALSE;
522   PetscErrorCode ierr;
523 
524   *lam_ext = 0;
525   lambda  = 0;
526   dlambda = 0.5;
527   innerIter = 1;
528 
529   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
530    *
531    *  Optimality conditions for \phi:
532    *
533    *  1. lambda   <= 0
534    *  2. r        <= 0
535    *  3. r*lambda == 0
536    */
537 
538   /* Bracketing Phase */
539   r = phi(x, n, lambda, a, b, c, l, u);
540 
541   if(nonNegativeSlack) {
542     /* inequality constraint, i.e., with \xi >= 0 constraint */
543     if (r < TOL_R) return 0;
544   } else  {
545     /* equality constraint ,i.e., without \xi >= 0 constraint */
546     if (fabs(r) < TOL_R) return 0;
547   }
548 
549   if (r < 0.0){
550     lambdal = lambda;
551     rl      = r;
552     lambda  = lambda + dlambda;
553     r       = phi(x, n, lambda, a, b, c, l, u);
554     while (r < 0.0 && dlambda < BMRM_INFTY)  {
555       lambdal = lambda;
556       s       = rl/r - 1.0;
557       if (s < 0.1) s = 0.1;
558       dlambda = dlambda + dlambda/s;
559       lambda  = lambda + dlambda;
560       rl      = r;
561       r       = phi(x, n, lambda, a, b, c, l, u);
562     }
563     lambdau = lambda;
564     ru      = r;
565   } else {
566     lambdau = lambda;
567     ru      = r;
568     lambda  = lambda - dlambda;
569     r       = phi(x, n, lambda, a, b, c, l, u);
570     while (r > 0.0 && dlambda > -BMRM_INFTY) {
571       lambdau = lambda;
572       s       = ru/r - 1.0;
573       if (s < 0.1) s = 0.1;
574       dlambda = dlambda + dlambda/s;
575       lambda  = lambda - dlambda;
576       ru      = r;
577       r       = phi(x, n, lambda, a, b, c, l, u);
578     }
579     lambdal = lambda;
580     rl      = r;
581   }
582 
583   if(fabs(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
584 
585   if(ru == 0){
586     lambda = lambdau;
587     r = phi(x, n, lambda, a, b, c, l, u);
588     return innerIter;
589   }
590 
591   /* Secant Phase */
592   s       = 1.0 - rl/ru;
593   dlambda = dlambda/s;
594   lambda  = lambdau - dlambda;
595   r       = phi(x, n, lambda, a, b, c, l, u);
596 
597   while (fabs(r) > TOL_R
598          && dlambda > TOL_LAM * (1.0 + fabs(lambda))
599          && innerIter < df->maxProjIter){
600     innerIter++;
601     if (r > 0.0){
602       if (s <= 2.0){
603         lambdau = lambda;
604         ru      = r;
605         s       = 1.0 - rl/ru;
606         dlambda = (lambdau - lambdal) / s;
607         lambda  = lambdau - dlambda;
608       } else {
609         s          = ru/r-1.0;
610         if (s < 0.1) s = 0.1;
611         dlambda    = (lambdau - lambda) / s;
612         lambda_new = 0.75*lambdal + 0.25*lambda;
613         if (lambda_new < (lambda - dlambda))
614           lambda_new = lambda - dlambda;
615         lambdau    = lambda;
616         ru         = r;
617         lambda     = lambda_new;
618         s          = (lambdau - lambdal) / (lambdau - lambda);
619       }
620     } else {
621       if (s >= 2.0){
622         lambdal = lambda;
623         rl      = r;
624         s       = 1.0 - rl/ru;
625         dlambda = (lambdau - lambdal) / s;
626         lambda  = lambdau - dlambda;
627       } else {
628         s          = rl/r - 1.0;
629         if (s < 0.1) s = 0.1;
630         dlambda    = (lambda-lambdal) / s;
631         lambda_new = 0.75*lambdau + 0.25*lambda;
632         if (lambda_new > (lambda + dlambda))
633           lambda_new = lambda + dlambda;
634         lambdal    = lambda;
635         rl         = r;
636         lambda     = lambda_new;
637         s          = (lambdau - lambdal) / (lambdau-lambda);
638       }
639     }
640     r = phi(x, n, lambda, a, b, c, l, u);
641   }
642 
643   *lam_ext = lambda;
644   if(innerIter >= df->maxProjIter) {
645     ierr = PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
646   }
647   return innerIter;
648 }
649 
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "solve"
653 PetscErrorCode solve(TAO_DF *df)
654 {
655   PetscErrorCode ierr;
656   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
657   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
658   PetscReal      DELTAsv, ProdDELTAsv;
659   PetscReal      c, *tempQ;
660   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
661   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
662   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
663   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
664   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
665 
666   /*** variables for the adaptive nonmonotone linesearch ***/
667   PetscInt    L, llast;
668   PetscReal   fr, fbest, fv, fc, fv0;
669 
670   c = BMRM_INFTY;
671 
672   DELTAsv = EPS_SV;
673   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
674   else  ProdDELTAsv = EPS_SV;
675 
676   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
677 
678   lam_ext = 0.0;
679 
680   /* Project the initial solution */
681   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
682 
683   /* Compute gradient
684      g = Q*x + f; */
685 
686   it = 0;
687   for (i = 0; i < dim; i++) {
688     if (fabs(x[i]) > ProdDELTAsv) ipt[it++] = i;
689   }
690 
691   ierr = PetscMemzero(t, dim*sizeof(PetscReal)); CHKERRQ(ierr);
692   for (i = 0; i < it; i++){
693     tempQ = Q[ipt[i]];
694     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
695   }
696   for (i = 0; i < dim; i++){
697     g[i] = t[i] + f[i];
698   }
699 
700 
701   /* y = -(x_{k} - g_{k}) */
702   for (i = 0; i < dim; i++){
703     y[i] = g[i] - x[i];
704   }
705 
706   /* Project x_{k} - g_{k} */
707   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
708 
709   /* y = P(x_{k} - g_{k}) - x_{k} */
710   max = ALPHA_MIN;
711   for (i = 0; i < dim; i++){
712     y[i] = tempv[i] - x[i];
713     if (fabs(y[i]) > max) max = fabs(y[i]);
714   }
715 
716   if (max < tol*1e-3){
717     lscount = 0;
718     innerIter    = 0;
719     return 0;
720   }
721 
722   alpha = 1.0 / max;
723 
724   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
725   fv0   = 0.0;
726   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
727 
728   /*** adaptive nonmonotone linesearch ***/
729   L     = 2;
730   fr    = ALPHA_MAX;
731   fbest = fv0;
732   fc    = fv0;
733   llast = 0;
734   akold = bkold = 0.0;
735 
736   /***      Iterator begins     ***/
737   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
738 
739     /* tempv = -(x_{k} - alpha*g_{k}) */
740     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
741 
742     /* Project x_{k} - alpha*g_{k} */
743     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
744 
745 
746     /* gd = \inner{d_{k}}{g_{k}}
747         d = P(x_{k} - alpha*g_{k}) - x_{k}
748     */
749     gd = 0.0;
750     for (i = 0; i < dim; i++){
751       d[i] = y[i] - x[i];
752       gd  += d[i] * g[i];
753     }
754 
755     /* Gradient computation  */
756 
757     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
758 
759     it = it2 = 0;
760     for (i = 0; i < dim; i++){
761       if (fabs(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
762     }
763     for (i = 0; i < dim; i++) {
764       if (fabs(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
765     }
766 
767     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal)); CHKERRQ(ierr);
768     /* compute Qd = Q*d */
769     if (it < it2){
770       for (i = 0; i < it; i++){
771         tempQ = Q[ipt[i]];
772         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
773       }
774     } else { /* compute Qd = Q*y-t */
775       for (i = 0; i < it2; i++){
776         tempQ = Q[ipt2[i]];
777         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
778       }
779       for (j = 0; j < dim; j++) Qd[j] -= t[j];
780     }
781 
782     /* ak = inner{d_{k}}{d_{k}} */
783     ak = 0.0;
784     for (i = 0; i < dim; i++) ak += d[i] * d[i];
785 
786     bk = 0.0;
787     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
788 
789     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
790     else lamnew = 1.0;
791 
792     /* fv is computing f(x_{k} + d_{k}) */
793     fv = 0.0;
794     for (i = 0; i < dim; i++){
795       xplus[i] = x[i] + d[i];
796       tplus[i] = t[i] + Qd[i];
797       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
798     }
799 
800     /* fr is fref */
801     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
802       lscount++;
803       fv = 0.0;
804       for (i = 0; i < dim; i++){
805         xplus[i] = x[i] + lamnew*d[i];
806         tplus[i] = t[i] + lamnew*Qd[i];
807         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
808       }
809     }
810 
811     for (i = 0; i < dim; i++){
812       sk[i] = xplus[i] - x[i];
813       yk[i] = tplus[i] - t[i];
814       x[i]  = xplus[i];
815       t[i]  = tplus[i];
816       g[i]  = t[i] + f[i];
817     }
818 
819     /* update the line search control parameters */
820     if (fv < fbest){
821       fbest = fv;
822       fc    = fv;
823       llast = 0;
824     } else {
825       fc = (fc > fv ? fc : fv);
826       llast++;
827       if (llast == L){
828         fr    = fc;
829         fc    = fv;
830         llast = 0;
831       }
832     }
833 
834     ak = bk = 0.0;
835     for (i = 0; i < dim; i++){
836       ak += sk[i] * sk[i];
837       bk += sk[i] * yk[i];
838     }
839 
840     if (bk <= EPS*ak) alpha = ALPHA_MAX;
841     else {
842       if (bkold < EPS*akold) alpha = ak/bk;
843       else alpha = (akold+ak)/(bkold+bk);
844 
845       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
846       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
847     }
848 
849     akold = ak;
850     bkold = bk;
851 
852     /*** stopping criterion based on KKT conditions ***/
853     /* at optimal, gradient of lagrangian w.r.t. x is zero */
854 
855     bk = 0.0;
856     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
857 
858     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)){
859       it     = 0;
860       luv    = 0;
861       kktlam = 0.0;
862       for (i = 0; i < dim; i++){
863         /* x[i] is active hence lagrange multipliers for box constraints
864                 are zero. The lagrange multiplier for ineq. const. is then
865                 defined as below
866         */
867         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
868           ipt[it++] = i;
869           kktlam    = kktlam - a[i]*g[i];
870         } else  uv[luv++] = i;
871       }
872 
873       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
874       else {
875         kktlam = kktlam/it;
876         info   = 1;
877         for (i = 0; i < it; i++) {
878           if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
879             info = 0;
880             break;
881           }
882         }
883         if (info == 1)  {
884           for (i = 0; i < luv; i++)  {
885             if (x[uv[i]] <= DELTAsv){
886               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
887                      not be zero. So, the gradient without beta is > 0
888               */
889               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
890                 info = 0;
891                 break;
892               }
893             } else {
894               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
895                      not be zero. So, the gradient without eta is < 0
896               */
897               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
898                 info = 0;
899                 break;
900               }
901             }
902           }
903         }
904 
905         if (info == 1) return 0;
906       }
907     }
908   }
909   return 0;
910 }
911 
912 
913