xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision b8a3fda2eec00bbbfad0cf0db17d1d1f04b07f29)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 
11 /*------------------------------------------------------------*/
12 /* The main solver function
13 
14    f = Remp(W)          This is what the user provides us from the application layer
15    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
16 
17    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
18 */
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "make_grad_node"
22 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = PetscNew(p);CHKERRQ(ierr);
28   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
29   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
30   (*p)->next = NULL;
31   PetscFunctionReturn(0);
32 }
33 
34 #undef __FUNCT__
35 #define __FUNCT__ "destroy_grad_list"
36 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
37 {
38   PetscErrorCode ierr;
39   Vec_Chain      *p = head->next, *q;
40 
41   PetscFunctionBegin;
42   while(p) {
43     q = p->next;
44     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
45     ierr = PetscFree(p);CHKERRQ(ierr);
46     p = q;
47   }
48   head->next = NULL;
49   PetscFunctionReturn(0);
50 }
51 
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "TaoSolve_BMRM"
55 static PetscErrorCode TaoSolve_BMRM(Tao tao)
56 {
57   PetscErrorCode     ierr;
58   TaoConvergedReason reason;
59   TAO_DF             df;
60   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
61 
62   /* Values and pointers to parts of the optimization problem */
63   PetscReal          f = 0.0;
64   Vec                W = tao->solution;
65   Vec                G = tao->gradient;
66   PetscReal          lambda;
67   PetscReal          bt;
68   Vec_Chain          grad_list, *tail_glist, *pgrad;
69   PetscInt           i;
70   PetscMPIInt        rank;
71 
72   /* Used in converged criteria check */
73   PetscReal          reg;
74   PetscReal          jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
75   PetscReal          innerSolverTol;
76   MPI_Comm           comm;
77 
78   PetscFunctionBegin;
79   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
80   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
81   lambda = bmrm->lambda;
82 
83   /* Check Stopping Condition */
84   tao->step = 1.0;
85   max_jtwt = -BMRM_INFTY;
86   min_jw = BMRM_INFTY;
87   innerSolverTol = 1.0;
88   epsilon = 0.0;
89 
90   if (!rank) {
91     ierr = init_df_solver(&df);CHKERRQ(ierr);
92     grad_list.next = NULL;
93     tail_glist = &grad_list;
94   }
95 
96   df.tol = 1e-6;
97   reason = TAO_CONTINUE_ITERATING;
98 
99   /*-----------------Algorithm Begins------------------------*/
100   /* make the scatter */
101   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
102   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
103   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
104 
105   /* NOTE: In application pass the sub-gradient of Remp(W) */
106   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
107   ierr = TaoMonitor(tao,tao->niter,f,1.0,0.0,tao->step,&reason);CHKERRQ(ierr);
108   while (reason == TAO_CONTINUE_ITERATING) {
109     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
110     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
111     bt = f - bt;
112 
113     /* First gather the gradient to the master node */
114     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
115     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
116 
117     /* Bring up the inner solver */
118     if (!rank) {
119       ierr = ensure_df_space(tao->niter+1, &df); CHKERRQ(ierr);
120       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
121       tail_glist->next = pgrad;
122       tail_glist = pgrad;
123 
124       df.a[tao->niter] = 1.0;
125       df.f[tao->niter] = -bt;
126       df.u[tao->niter] = 1.0;
127       df.l[tao->niter] = 0.0;
128 
129       /* set up the Q */
130       pgrad = grad_list.next;
131       for (i=0; i<=tao->niter; i++) {
132         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
133         df.Q[i][tao->niter] = df.Q[tao->niter][i] = reg / lambda;
134         pgrad = pgrad->next;
135       }
136 
137       if (tao->niter > 0) {
138         df.x[tao->niter] = 0.0;
139         ierr = solve(&df); CHKERRQ(ierr);
140       } else
141         df.x[0] = 1.0;
142 
143       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
144       jtwt = 0.0;
145       ierr = VecSet(bmrm->local_w, 0.0); CHKERRQ(ierr);
146       pgrad = grad_list.next;
147       for (i=0; i<=tao->niter; i++) {
148         jtwt -= df.x[i] * df.f[i];
149         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V); CHKERRQ(ierr);
150         pgrad = pgrad->next;
151       }
152 
153       ierr = VecNorm(bmrm->local_w, NORM_2, &reg); CHKERRQ(ierr);
154       reg = 0.5*lambda*reg*reg;
155       jtwt -= reg;
156     } /* end if rank == 0 */
157 
158     /* scatter the new W to all nodes */
159     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
160     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
161 
162     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
163 
164     ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
165     ierr = MPI_Bcast(&reg,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
166 
167     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
168     if (jw < min_jw) min_jw = jw;
169     if (jtwt > max_jtwt) max_jtwt = jtwt;
170 
171     pre_epsilon = epsilon;
172     epsilon = min_jw - jtwt;
173 
174     if (!rank) {
175       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
176       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
177 
178       /* if the annealing doesn't work well, lower the inner solver tolerance */
179       if(pre_epsilon < epsilon) innerSolverTol *= 0.2;
180 
181       df.tol = innerSolverTol*0.5;
182     }
183 
184     tao->niter++;
185     ierr = TaoMonitor(tao,tao->niter,min_jw,epsilon,0.0,tao->step,&reason);CHKERRQ(ierr);
186   }
187 
188   /* free all the memory */
189   if (!rank) {
190     ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
191     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
192   }
193 
194   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
195   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
196   PetscFunctionReturn(0);
197 }
198 
199 
200 /* ---------------------------------------------------------- */
201 
202 #undef __FUNCT__
203 #define __FUNCT__ "TaoSetup_BMRM"
204 static PetscErrorCode TaoSetup_BMRM(Tao tao)
205 {
206 
207   PetscErrorCode ierr;
208 
209   PetscFunctionBegin;
210   /* Allocate some arrays */
211   if (!tao->gradient) {
212     ierr = VecDuplicate(tao->solution, &tao->gradient);   CHKERRQ(ierr);
213   }
214   PetscFunctionReturn(0);
215 }
216 
217 /*------------------------------------------------------------*/
218 #undef __FUNCT__
219 #define __FUNCT__ "TaoDestroy_BMRM"
220 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
221 {
222   PetscErrorCode ierr;
223 
224   PetscFunctionBegin;
225   ierr = PetscFree(tao->data);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 #undef __FUNCT__
230 #define __FUNCT__ "TaoSetFromOptions_BMRM"
231 static PetscErrorCode TaoSetFromOptions_BMRM(PetscOptionItems *PetscOptionsObject,Tao tao)
232 {
233   PetscErrorCode ierr;
234   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
235 
236   PetscFunctionBegin;
237   ierr = PetscOptionsHead(PetscOptionsObject,"BMRM for regularized risk minimization");CHKERRQ(ierr);
238   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,NULL); CHKERRQ(ierr);
239   ierr = PetscOptionsTail();CHKERRQ(ierr);
240   PetscFunctionReturn(0);
241 }
242 
243 /*------------------------------------------------------------*/
244 #undef __FUNCT__
245 #define __FUNCT__ "TaoView_BMRM"
246 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
247 {
248   PetscBool      isascii;
249   PetscErrorCode ierr;
250 
251   PetscFunctionBegin;
252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
253   if (isascii) {
254     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
255     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 /*------------------------------------------------------------*/
261 /*MC
262   TAOBMRM - bundle method for regularized risk minimization
263 
264   Options Database Keys:
265 . - tao_bmrm_lambda - regulariser weight
266 
267   Level: beginner
268 M*/
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "TaoCreate_BMRM"
272 PETSC_EXTERN PetscErrorCode TaoCreate_BMRM(Tao tao)
273 {
274   TAO_BMRM       *bmrm;
275   PetscErrorCode ierr;
276 
277   PetscFunctionBegin;
278   tao->ops->setup = TaoSetup_BMRM;
279   tao->ops->solve = TaoSolve_BMRM;
280   tao->ops->view  = TaoView_BMRM;
281   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
282   tao->ops->destroy = TaoDestroy_BMRM;
283 
284   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
285   bmrm->lambda = 1.0;
286   tao->data = (void*)bmrm;
287 
288   /* Override default settings (unless already changed) */
289   if (!tao->max_it_changed) tao->max_it = 2000;
290   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
291   if (!tao->gatol_changed) tao->gatol = 1.0e-12;
292   if (!tao->grtol_changed) tao->grtol = 1.0e-12;
293 
294   PetscFunctionReturn(0);
295 }
296 
297 #undef __FUNCT__
298 #define __FUNCT__ "init_df_solver"
299 PetscErrorCode init_df_solver(TAO_DF *df)
300 {
301   PetscInt       i, n = INCRE_DIM;
302   PetscErrorCode ierr;
303 
304   PetscFunctionBegin;
305   /* default values */
306   df->maxProjIter = 200;
307   df->maxPGMIter = 300000;
308   df->b = 1.0;
309 
310   /* memory space required by Dai-Fletcher */
311   df->cur_num_cp = n;
312   ierr = PetscMalloc1(n, &df->f); CHKERRQ(ierr);
313   ierr = PetscMalloc1(n, &df->a); CHKERRQ(ierr);
314   ierr = PetscMalloc1(n, &df->l); CHKERRQ(ierr);
315   ierr = PetscMalloc1(n, &df->u); CHKERRQ(ierr);
316   ierr = PetscMalloc1(n, &df->x); CHKERRQ(ierr);
317   ierr = PetscMalloc1(n, &df->Q); CHKERRQ(ierr);
318 
319   for (i = 0; i < n; i ++) {
320     ierr = PetscMalloc1(n, &df->Q[i]); CHKERRQ(ierr);
321   }
322 
323   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
324   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
325   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
326   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
327   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
328   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
329   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
330   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
331   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
332   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
333 
334   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
335   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
336   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
337   PetscFunctionReturn(0);
338 }
339 
340 #undef __FUNCT__
341 #define __FUNCT__ "ensure_df_space"
342 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
343 {
344   PetscErrorCode ierr;
345   PetscReal      *tmp, **tmp_Q;
346   PetscInt       i, n, old_n;
347 
348   PetscFunctionBegin;
349   df->dim = dim;
350   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
351 
352   old_n = df->cur_num_cp;
353   df->cur_num_cp += INCRE_DIM;
354   n = df->cur_num_cp;
355 
356   /* memory space required by dai-fletcher */
357   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
358   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
359   ierr = PetscFree(df->f); CHKERRQ(ierr);
360   df->f = tmp;
361 
362   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
363   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
364   ierr = PetscFree(df->a); CHKERRQ(ierr);
365   df->a = tmp;
366 
367   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
368   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
369   ierr = PetscFree(df->l); CHKERRQ(ierr);
370   df->l = tmp;
371 
372   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
373   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
374   ierr = PetscFree(df->u); CHKERRQ(ierr);
375   df->u = tmp;
376 
377   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
378   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
379   ierr = PetscFree(df->x); CHKERRQ(ierr);
380   df->x = tmp;
381 
382   ierr = PetscMalloc1(n, &tmp_Q); CHKERRQ(ierr);
383   for (i = 0; i < n; i ++) {
384     ierr = PetscMalloc1(n, &tmp_Q[i]); CHKERRQ(ierr);
385     if (i < old_n) {
386       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n); CHKERRQ(ierr);
387       ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
388     }
389   }
390 
391   ierr = PetscFree(df->Q); CHKERRQ(ierr);
392   df->Q = tmp_Q;
393 
394   ierr = PetscFree(df->g); CHKERRQ(ierr);
395   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
396 
397   ierr = PetscFree(df->y); CHKERRQ(ierr);
398   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
399 
400   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
401   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
402 
403   ierr = PetscFree(df->d); CHKERRQ(ierr);
404   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
405 
406   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
407   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
408 
409   ierr = PetscFree(df->t); CHKERRQ(ierr);
410   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
411 
412   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
413   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
414 
415   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
416   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
417 
418   ierr = PetscFree(df->sk); CHKERRQ(ierr);
419   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
420 
421   ierr = PetscFree(df->yk); CHKERRQ(ierr);
422   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
423 
424   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
425   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
426 
427   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
428   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
429 
430   ierr = PetscFree(df->uv); CHKERRQ(ierr);
431   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434 
435 #undef __FUNCT__
436 #define __FUNCT__ "destroy_df_solver"
437 PetscErrorCode destroy_df_solver(TAO_DF *df)
438 {
439   PetscErrorCode ierr;
440   PetscInt       i;
441 
442   PetscFunctionBegin;
443   ierr = PetscFree(df->f); CHKERRQ(ierr);
444   ierr = PetscFree(df->a); CHKERRQ(ierr);
445   ierr = PetscFree(df->l); CHKERRQ(ierr);
446   ierr = PetscFree(df->u); CHKERRQ(ierr);
447   ierr = PetscFree(df->x); CHKERRQ(ierr);
448 
449   for (i = 0; i < df->cur_num_cp; i ++) {
450     ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
451   }
452   ierr = PetscFree(df->Q); CHKERRQ(ierr);
453   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
454   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
455   ierr = PetscFree(df->uv); CHKERRQ(ierr);
456   ierr = PetscFree(df->g); CHKERRQ(ierr);
457   ierr = PetscFree(df->y); CHKERRQ(ierr);
458   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
459   ierr = PetscFree(df->d); CHKERRQ(ierr);
460   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
461   ierr = PetscFree(df->t); CHKERRQ(ierr);
462   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
463   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
464   ierr = PetscFree(df->sk); CHKERRQ(ierr);
465   ierr = PetscFree(df->yk); CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
470 #undef __FUNCT__
471 #define __FUNCT__ "phi"
472 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
473 {
474   PetscReal r = 0.0;
475   PetscInt  i;
476 
477   for (i = 0; i < n; i++){
478     x[i] = -c[i] + lambda*a[i];
479     if (x[i] > u[i])     x[i] = u[i];
480     else if(x[i] < l[i]) x[i] = l[i];
481     r += a[i]*x[i];
482   }
483   return r - b;
484 }
485 
486 /** Modified Dai-Fletcher QP projector solves the problem:
487  *
488  *      minimise  0.5*x'*x - c'*x
489  *      subj to   a'*x = b
490  *                l \leq x \leq u
491  *
492  *  \param c The point to be projected onto feasible set
493  */
494 #undef __FUNCT__
495 #define __FUNCT__ "project"
496 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
497 {
498   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
499   PetscReal      r, rl, ru, s;
500   PetscInt       innerIter;
501   PetscBool      nonNegativeSlack = PETSC_FALSE;
502   PetscErrorCode ierr;
503 
504   *lam_ext = 0;
505   lambda  = 0;
506   dlambda = 0.5;
507   innerIter = 1;
508 
509   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
510    *
511    *  Optimality conditions for \phi:
512    *
513    *  1. lambda   <= 0
514    *  2. r        <= 0
515    *  3. r*lambda == 0
516    */
517 
518   /* Bracketing Phase */
519   r = phi(x, n, lambda, a, b, c, l, u);
520 
521   if(nonNegativeSlack) {
522     /* inequality constraint, i.e., with \xi >= 0 constraint */
523     if (r < TOL_R) return 0;
524   } else  {
525     /* equality constraint ,i.e., without \xi >= 0 constraint */
526     if (fabs(r) < TOL_R) return 0;
527   }
528 
529   if (r < 0.0){
530     lambdal = lambda;
531     rl      = r;
532     lambda  = lambda + dlambda;
533     r       = phi(x, n, lambda, a, b, c, l, u);
534     while (r < 0.0 && dlambda < BMRM_INFTY)  {
535       lambdal = lambda;
536       s       = rl/r - 1.0;
537       if (s < 0.1) s = 0.1;
538       dlambda = dlambda + dlambda/s;
539       lambda  = lambda + dlambda;
540       rl      = r;
541       r       = phi(x, n, lambda, a, b, c, l, u);
542     }
543     lambdau = lambda;
544     ru      = r;
545   } else {
546     lambdau = lambda;
547     ru      = r;
548     lambda  = lambda - dlambda;
549     r       = phi(x, n, lambda, a, b, c, l, u);
550     while (r > 0.0 && dlambda > -BMRM_INFTY) {
551       lambdau = lambda;
552       s       = ru/r - 1.0;
553       if (s < 0.1) s = 0.1;
554       dlambda = dlambda + dlambda/s;
555       lambda  = lambda - dlambda;
556       ru      = r;
557       r       = phi(x, n, lambda, a, b, c, l, u);
558     }
559     lambdal = lambda;
560     rl      = r;
561   }
562 
563   if(fabs(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
564 
565   if(ru == 0){
566     return innerIter;
567   }
568 
569   /* Secant Phase */
570   s       = 1.0 - rl/ru;
571   dlambda = dlambda/s;
572   lambda  = lambdau - dlambda;
573   r       = phi(x, n, lambda, a, b, c, l, u);
574 
575   while (fabs(r) > TOL_R
576          && dlambda > TOL_LAM * (1.0 + fabs(lambda))
577          && innerIter < df->maxProjIter){
578     innerIter++;
579     if (r > 0.0){
580       if (s <= 2.0){
581         lambdau = lambda;
582         ru      = r;
583         s       = 1.0 - rl/ru;
584         dlambda = (lambdau - lambdal) / s;
585         lambda  = lambdau - dlambda;
586       } else {
587         s          = ru/r-1.0;
588         if (s < 0.1) s = 0.1;
589         dlambda    = (lambdau - lambda) / s;
590         lambda_new = 0.75*lambdal + 0.25*lambda;
591         if (lambda_new < (lambda - dlambda))
592           lambda_new = lambda - dlambda;
593         lambdau    = lambda;
594         ru         = r;
595         lambda     = lambda_new;
596         s          = (lambdau - lambdal) / (lambdau - lambda);
597       }
598     } else {
599       if (s >= 2.0){
600         lambdal = lambda;
601         rl      = r;
602         s       = 1.0 - rl/ru;
603         dlambda = (lambdau - lambdal) / s;
604         lambda  = lambdau - dlambda;
605       } else {
606         s          = rl/r - 1.0;
607         if (s < 0.1) s = 0.1;
608         dlambda    = (lambda-lambdal) / s;
609         lambda_new = 0.75*lambdau + 0.25*lambda;
610         if (lambda_new > (lambda + dlambda))
611           lambda_new = lambda + dlambda;
612         lambdal    = lambda;
613         rl         = r;
614         lambda     = lambda_new;
615         s          = (lambdau - lambdal) / (lambdau-lambda);
616       }
617     }
618     r = phi(x, n, lambda, a, b, c, l, u);
619   }
620 
621   *lam_ext = lambda;
622   if(innerIter >= df->maxProjIter) {
623     ierr = PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
624   }
625   return innerIter;
626 }
627 
628 
629 #undef __FUNCT__
630 #define __FUNCT__ "solve"
631 PetscErrorCode solve(TAO_DF *df)
632 {
633   PetscErrorCode ierr;
634   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
635   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
636   PetscReal      DELTAsv, ProdDELTAsv;
637   PetscReal      c, *tempQ;
638   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
639   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
640   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
641   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
642   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
643 
644   /*** variables for the adaptive nonmonotone linesearch ***/
645   PetscInt    L, llast;
646   PetscReal   fr, fbest, fv, fc, fv0;
647 
648   c = BMRM_INFTY;
649 
650   DELTAsv = EPS_SV;
651   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
652   else  ProdDELTAsv = EPS_SV;
653 
654   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
655 
656   lam_ext = 0.0;
657 
658   /* Project the initial solution */
659   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
660 
661   /* Compute gradient
662      g = Q*x + f; */
663 
664   it = 0;
665   for (i = 0; i < dim; i++) {
666     if (fabs(x[i]) > ProdDELTAsv) ipt[it++] = i;
667   }
668 
669   ierr = PetscMemzero(t, dim*sizeof(PetscReal)); CHKERRQ(ierr);
670   for (i = 0; i < it; i++){
671     tempQ = Q[ipt[i]];
672     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
673   }
674   for (i = 0; i < dim; i++){
675     g[i] = t[i] + f[i];
676   }
677 
678 
679   /* y = -(x_{k} - g_{k}) */
680   for (i = 0; i < dim; i++){
681     y[i] = g[i] - x[i];
682   }
683 
684   /* Project x_{k} - g_{k} */
685   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
686 
687   /* y = P(x_{k} - g_{k}) - x_{k} */
688   max = ALPHA_MIN;
689   for (i = 0; i < dim; i++){
690     y[i] = tempv[i] - x[i];
691     if (fabs(y[i]) > max) max = fabs(y[i]);
692   }
693 
694   if (max < tol*1e-3){
695     return 0;
696   }
697 
698   alpha = 1.0 / max;
699 
700   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
701   fv0   = 0.0;
702   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
703 
704   /*** adaptive nonmonotone linesearch ***/
705   L     = 2;
706   fr    = ALPHA_MAX;
707   fbest = fv0;
708   fc    = fv0;
709   llast = 0;
710   akold = bkold = 0.0;
711 
712   /***      Iterator begins     ***/
713   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
714 
715     /* tempv = -(x_{k} - alpha*g_{k}) */
716     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
717 
718     /* Project x_{k} - alpha*g_{k} */
719     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
720 
721 
722     /* gd = \inner{d_{k}}{g_{k}}
723         d = P(x_{k} - alpha*g_{k}) - x_{k}
724     */
725     gd = 0.0;
726     for (i = 0; i < dim; i++){
727       d[i] = y[i] - x[i];
728       gd  += d[i] * g[i];
729     }
730 
731     /* Gradient computation  */
732 
733     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
734 
735     it = it2 = 0;
736     for (i = 0; i < dim; i++){
737       if (fabs(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
738     }
739     for (i = 0; i < dim; i++) {
740       if (fabs(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
741     }
742 
743     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal)); CHKERRQ(ierr);
744     /* compute Qd = Q*d */
745     if (it < it2){
746       for (i = 0; i < it; i++){
747         tempQ = Q[ipt[i]];
748         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
749       }
750     } else { /* compute Qd = Q*y-t */
751       for (i = 0; i < it2; i++){
752         tempQ = Q[ipt2[i]];
753         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
754       }
755       for (j = 0; j < dim; j++) Qd[j] -= t[j];
756     }
757 
758     /* ak = inner{d_{k}}{d_{k}} */
759     ak = 0.0;
760     for (i = 0; i < dim; i++) ak += d[i] * d[i];
761 
762     bk = 0.0;
763     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
764 
765     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
766     else lamnew = 1.0;
767 
768     /* fv is computing f(x_{k} + d_{k}) */
769     fv = 0.0;
770     for (i = 0; i < dim; i++){
771       xplus[i] = x[i] + d[i];
772       tplus[i] = t[i] + Qd[i];
773       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
774     }
775 
776     /* fr is fref */
777     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
778       lscount++;
779       fv = 0.0;
780       for (i = 0; i < dim; i++){
781         xplus[i] = x[i] + lamnew*d[i];
782         tplus[i] = t[i] + lamnew*Qd[i];
783         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
784       }
785     }
786 
787     for (i = 0; i < dim; i++){
788       sk[i] = xplus[i] - x[i];
789       yk[i] = tplus[i] - t[i];
790       x[i]  = xplus[i];
791       t[i]  = tplus[i];
792       g[i]  = t[i] + f[i];
793     }
794 
795     /* update the line search control parameters */
796     if (fv < fbest){
797       fbest = fv;
798       fc    = fv;
799       llast = 0;
800     } else {
801       fc = (fc > fv ? fc : fv);
802       llast++;
803       if (llast == L){
804         fr    = fc;
805         fc    = fv;
806         llast = 0;
807       }
808     }
809 
810     ak = bk = 0.0;
811     for (i = 0; i < dim; i++){
812       ak += sk[i] * sk[i];
813       bk += sk[i] * yk[i];
814     }
815 
816     if (bk <= EPS*ak) alpha = ALPHA_MAX;
817     else {
818       if (bkold < EPS*akold) alpha = ak/bk;
819       else alpha = (akold+ak)/(bkold+bk);
820 
821       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
822       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
823     }
824 
825     akold = ak;
826     bkold = bk;
827 
828     /*** stopping criterion based on KKT conditions ***/
829     /* at optimal, gradient of lagrangian w.r.t. x is zero */
830 
831     bk = 0.0;
832     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
833 
834     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)){
835       it     = 0;
836       luv    = 0;
837       kktlam = 0.0;
838       for (i = 0; i < dim; i++){
839         /* x[i] is active hence lagrange multipliers for box constraints
840                 are zero. The lagrange multiplier for ineq. const. is then
841                 defined as below
842         */
843         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
844           ipt[it++] = i;
845           kktlam    = kktlam - a[i]*g[i];
846         } else  uv[luv++] = i;
847       }
848 
849       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
850       else {
851         kktlam = kktlam/it;
852         info   = 1;
853         for (i = 0; i < it; i++) {
854           if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
855             info = 0;
856             break;
857           }
858         }
859         if (info == 1)  {
860           for (i = 0; i < luv; i++)  {
861             if (x[uv[i]] <= DELTAsv){
862               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
863                      not be zero. So, the gradient without beta is > 0
864               */
865               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
866                 info = 0;
867                 break;
868               }
869             } else {
870               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
871                      not be zero. So, the gradient without eta is < 0
872               */
873               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
874                 info = 0;
875                 break;
876               }
877             }
878           }
879         }
880 
881         if (info == 1) return 0;
882       }
883     }
884   }
885   return 0;
886 }
887 
888 
889