xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision f1e55ea32162cf9a365715cdae2babe91dbef974)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 
11 /*------------------------------------------------------------*/
12 /* The main solver function
13 
14    f = Remp(W)          This is what the user provides us from the application layer
15    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
16 
17    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
18 */
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "make_grad_node"
22 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = PetscNew(p);CHKERRQ(ierr);
28   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
29   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
30   (*p)->next = NULL;
31   PetscFunctionReturn(0);
32 }
33 
34 #undef __FUNCT__
35 #define __FUNCT__ "destroy_grad_list"
36 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
37 {
38   PetscErrorCode ierr;
39   Vec_Chain      *p = head->next, *q;
40 
41   PetscFunctionBegin;
42   while(p) {
43     q = p->next;
44     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
45     ierr = PetscFree(p);CHKERRQ(ierr);
46     p = q;
47   }
48   head->next = NULL;
49   PetscFunctionReturn(0);
50 }
51 
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "TaoSolve_BMRM"
55 static PetscErrorCode TaoSolve_BMRM(Tao tao)
56 {
57   PetscErrorCode     ierr;
58   TaoConvergedReason reason;
59   TAO_DF             df;
60   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
61 
62   /* Values and pointers to parts of the optimization problem */
63   PetscReal          f = 0.0;
64   Vec                W = tao->solution;
65   Vec                G = tao->gradient;
66   PetscReal          lambda;
67   PetscReal          bt;
68   Vec_Chain          grad_list, *tail_glist, *pgrad;
69   PetscInt           iter = 0;
70   PetscInt           i;
71   PetscMPIInt        rank;
72 
73   /* Used in converged criteria check */
74   PetscReal          reg;
75   PetscReal          jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
76   PetscReal          innerSolverTol;
77   MPI_Comm           comm;
78 
79   PetscFunctionBegin;
80   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
81   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
82   lambda = bmrm->lambda;
83 
84   /* Check Stopping Condition */
85   tao->step = 1.0;
86   max_jtwt = -BMRM_INFTY;
87   min_jw = BMRM_INFTY;
88   innerSolverTol = 1.0;
89   epsilon = 0.0;
90 
91   if (!rank) {
92     ierr = init_df_solver(&df);CHKERRQ(ierr);
93     grad_list.next = NULL;
94     tail_glist = &grad_list;
95   }
96 
97   df.tol = 1e-6;
98   reason = TAO_CONTINUE_ITERATING;
99 
100   /*-----------------Algorithm Begins------------------------*/
101   /* make the scatter */
102   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
103   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
104   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
105 
106   /* NOTE: In application pass the sub-gradient of Remp(W) */
107   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
108   ierr = TaoMonitor(tao,iter,f,1.0,0.0,tao->step,&reason);CHKERRQ(ierr);
109   while (reason == TAO_CONTINUE_ITERATING) {
110     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
111     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
112     bt = f - bt;
113 
114     /* First gather the gradient to the master node */
115     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
116     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
117 
118     /* Bring up the inner solver */
119     if (!rank) {
120       ierr = ensure_df_space(iter+1, &df); CHKERRQ(ierr);
121       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
122       tail_glist->next = pgrad;
123       tail_glist = pgrad;
124 
125       df.a[iter] = 1.0;
126       df.f[iter] = -bt;
127       df.u[iter] = 1.0;
128       df.l[iter] = 0.0;
129 
130       /* set up the Q */
131       pgrad = grad_list.next;
132       for (i=0; i<=iter; i++) {
133         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
134         df.Q[i][iter] = df.Q[iter][i] = reg / lambda;
135         pgrad = pgrad->next;
136       }
137 
138       if (iter > 0) {
139         df.x[iter] = 0.0;
140         ierr = solve(&df); CHKERRQ(ierr);
141       } else
142         df.x[0] = 1.0;
143 
144       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
145       jtwt = 0.0;
146       ierr = VecSet(bmrm->local_w, 0.0); CHKERRQ(ierr);
147       pgrad = grad_list.next;
148       for (i=0; i<=iter; i++) {
149         jtwt -= df.x[i] * df.f[i];
150         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V); CHKERRQ(ierr);
151         pgrad = pgrad->next;
152       }
153 
154       ierr = VecNorm(bmrm->local_w, NORM_2, &reg); CHKERRQ(ierr);
155       reg = 0.5*lambda*reg*reg;
156       jtwt -= reg;
157     } /* end if rank == 0 */
158 
159     /* scatter the new W to all nodes */
160     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
161     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
162 
163     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
164 
165     ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
166     ierr = MPI_Bcast(&reg,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
167 
168     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
169     if (jw < min_jw) min_jw = jw;
170     if (jtwt > max_jtwt) max_jtwt = jtwt;
171 
172     pre_epsilon = epsilon;
173     epsilon = min_jw - jtwt;
174 
175     if (!rank) {
176       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
177       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
178 
179       /* if the annealing doesn't work well, lower the inner solver tolerance */
180       if(pre_epsilon < epsilon) innerSolverTol *= 0.2;
181 
182       df.tol = innerSolverTol*0.5;
183     }
184 
185     iter++;
186     ierr = TaoMonitor(tao,iter,min_jw,epsilon,0.0,tao->step,&reason);CHKERRQ(ierr);
187   }
188 
189   /* free all the memory */
190   if (!rank) {
191     ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
192     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
193   }
194 
195   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
196   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
197   PetscFunctionReturn(0);
198 }
199 
200 
201 /* ---------------------------------------------------------- */
202 
203 #undef __FUNCT__
204 #define __FUNCT__ "TaoSetup_BMRM"
205 static PetscErrorCode TaoSetup_BMRM(Tao tao)
206 {
207 
208   PetscErrorCode ierr;
209 
210   PetscFunctionBegin;
211   /* Allocate some arrays */
212   if (!tao->gradient) {
213     ierr = VecDuplicate(tao->solution, &tao->gradient);   CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 /*------------------------------------------------------------*/
219 #undef __FUNCT__
220 #define __FUNCT__ "TaoDestroy_BMRM"
221 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
222 {
223   PetscErrorCode ierr;
224 
225   PetscFunctionBegin;
226   ierr = PetscFree(tao->data);CHKERRQ(ierr);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "TaoSetFromOptions_BMRM"
232 static PetscErrorCode TaoSetFromOptions_BMRM(Tao tao)
233 {
234   PetscErrorCode ierr;
235   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
236   PetscBool      flg;
237 
238   PetscFunctionBegin;
239   ierr = PetscOptionsHead("BMRM for regularized risk minimization");CHKERRQ(ierr);
240   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,&flg); CHKERRQ(ierr);
241   ierr = PetscOptionsTail();CHKERRQ(ierr);
242   PetscFunctionReturn(0);
243 }
244 
245 /*------------------------------------------------------------*/
246 #undef __FUNCT__
247 #define __FUNCT__ "TaoView_BMRM"
248 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
249 {
250   PetscBool      isascii;
251   PetscErrorCode ierr;
252 
253   PetscFunctionBegin;
254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
255   if (isascii) {
256     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
257     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 /*------------------------------------------------------------*/
263 /*MC
264   TAOBMRM - bundle method for regularized risk minimization
265 
266   Options Database Keys:
267 . - tao_bmrm_lambda - regulariser weight
268 
269   Level: beginner
270 M*/
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "TaoCreate_BMRM"
274 PETSC_EXTERN PetscErrorCode TaoCreate_BMRM(Tao tao)
275 {
276   TAO_BMRM       *bmrm;
277   PetscErrorCode ierr;
278 
279   PetscFunctionBegin;
280   tao->ops->setup = TaoSetup_BMRM;
281   tao->ops->solve = TaoSolve_BMRM;
282   tao->ops->view  = TaoView_BMRM;
283   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
284   tao->ops->destroy = TaoDestroy_BMRM;
285 
286   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
287   bmrm->lambda = 1.0;
288   tao->data = (void*)bmrm;
289 
290   /* Note: May need to be tuned! */
291   tao->max_it = 2048;
292   tao->max_funcs = 300000;
293   tao->fatol = 1e-20;
294   tao->frtol = 1e-25;
295   tao->gatol = 1e-25;
296   tao->grtol = 1e-25;
297 
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "init_df_solver"
303 PetscErrorCode init_df_solver(TAO_DF *df)
304 {
305   PetscInt       i, n = INCRE_DIM;
306   PetscErrorCode ierr;
307 
308   PetscFunctionBegin;
309   /* default values */
310   df->maxProjIter = 200;
311   df->maxPGMIter = 300000;
312   df->b = 1.0;
313 
314   /* memory space required by Dai-Fletcher */
315   df->cur_num_cp = n;
316   ierr = PetscMalloc1(n, &df->f); CHKERRQ(ierr);
317   ierr = PetscMalloc1(n, &df->a); CHKERRQ(ierr);
318   ierr = PetscMalloc1(n, &df->l); CHKERRQ(ierr);
319   ierr = PetscMalloc1(n, &df->u); CHKERRQ(ierr);
320   ierr = PetscMalloc1(n, &df->x); CHKERRQ(ierr);
321   ierr = PetscMalloc1(n, &df->Q); CHKERRQ(ierr);
322 
323   for (i = 0; i < n; i ++) {
324     ierr = PetscMalloc1(n, &df->Q[i]); CHKERRQ(ierr);
325   }
326 
327   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
328   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
329   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
330   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
331   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
332   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
333   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
334   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
335   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
336   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
337 
338   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
339   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
340   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "ensure_df_space"
346 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
347 {
348   PetscErrorCode ierr;
349   PetscReal      *tmp, **tmp_Q;
350   PetscInt       i, n, old_n;
351 
352   PetscFunctionBegin;
353   df->dim = dim;
354   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
355 
356   old_n = df->cur_num_cp;
357   df->cur_num_cp += INCRE_DIM;
358   n = df->cur_num_cp;
359 
360   /* memory space required by dai-fletcher */
361   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
362   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
363   ierr = PetscFree(df->f); CHKERRQ(ierr);
364   df->f = tmp;
365 
366   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
367   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
368   ierr = PetscFree(df->a); CHKERRQ(ierr);
369   df->a = tmp;
370 
371   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
372   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
373   ierr = PetscFree(df->l); CHKERRQ(ierr);
374   df->l = tmp;
375 
376   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
377   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
378   ierr = PetscFree(df->u); CHKERRQ(ierr);
379   df->u = tmp;
380 
381   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
382   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
383   ierr = PetscFree(df->x); CHKERRQ(ierr);
384   df->x = tmp;
385 
386   ierr = PetscMalloc1(n, &tmp_Q); CHKERRQ(ierr);
387   for (i = 0; i < n; i ++) {
388     ierr = PetscMalloc1(n, &tmp_Q[i]); CHKERRQ(ierr);
389     if (i < old_n) {
390       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n); CHKERRQ(ierr);
391       ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
392     }
393   }
394 
395   ierr = PetscFree(df->Q); CHKERRQ(ierr);
396   df->Q = tmp_Q;
397 
398   ierr = PetscFree(df->g); CHKERRQ(ierr);
399   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
400 
401   ierr = PetscFree(df->y); CHKERRQ(ierr);
402   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
403 
404   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
405   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
406 
407   ierr = PetscFree(df->d); CHKERRQ(ierr);
408   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
409 
410   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
411   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
412 
413   ierr = PetscFree(df->t); CHKERRQ(ierr);
414   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
415 
416   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
417   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
418 
419   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
420   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
421 
422   ierr = PetscFree(df->sk); CHKERRQ(ierr);
423   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
424 
425   ierr = PetscFree(df->yk); CHKERRQ(ierr);
426   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
427 
428   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
429   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
430 
431   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
432   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
433 
434   ierr = PetscFree(df->uv); CHKERRQ(ierr);
435   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
436   PetscFunctionReturn(0);
437 }
438 
439 #undef __FUNCT__
440 #define __FUNCT__ "destroy_df_solver"
441 PetscErrorCode destroy_df_solver(TAO_DF *df)
442 {
443   PetscErrorCode ierr;
444   PetscInt       i;
445 
446   PetscFunctionBegin;
447   ierr = PetscFree(df->f); CHKERRQ(ierr);
448   ierr = PetscFree(df->a); CHKERRQ(ierr);
449   ierr = PetscFree(df->l); CHKERRQ(ierr);
450   ierr = PetscFree(df->u); CHKERRQ(ierr);
451   ierr = PetscFree(df->x); CHKERRQ(ierr);
452 
453   for (i = 0; i < df->cur_num_cp; i ++) {
454     ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
455   }
456   ierr = PetscFree(df->Q); CHKERRQ(ierr);
457   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
458   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
459   ierr = PetscFree(df->uv); CHKERRQ(ierr);
460   ierr = PetscFree(df->g); CHKERRQ(ierr);
461   ierr = PetscFree(df->y); CHKERRQ(ierr);
462   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
463   ierr = PetscFree(df->d); CHKERRQ(ierr);
464   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
465   ierr = PetscFree(df->t); CHKERRQ(ierr);
466   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
467   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
468   ierr = PetscFree(df->sk); CHKERRQ(ierr);
469   ierr = PetscFree(df->yk); CHKERRQ(ierr);
470   PetscFunctionReturn(0);
471 }
472 
473 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
474 #undef __FUNCT__
475 #define __FUNCT__ "phi"
476 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
477 {
478   PetscReal r = 0.0;
479   PetscInt  i;
480 
481   for (i = 0; i < n; i++){
482     x[i] = -c[i] + lambda*a[i];
483     if (x[i] > u[i])     x[i] = u[i];
484     else if(x[i] < l[i]) x[i] = l[i];
485     r += a[i]*x[i];
486   }
487   return r - b;
488 }
489 
490 /** Modified Dai-Fletcher QP projector solves the problem:
491  *
492  *      minimise  0.5*x'*x - c'*x
493  *      subj to   a'*x = b
494  *                l \leq x \leq u
495  *
496  *  \param c The point to be projected onto feasible set
497  */
498 #undef __FUNCT__
499 #define __FUNCT__ "project"
500 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
501 {
502   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
503   PetscReal      r, rl, ru, s;
504   PetscInt       innerIter;
505   PetscBool      nonNegativeSlack = PETSC_FALSE;
506   PetscErrorCode ierr;
507 
508   *lam_ext = 0;
509   lambda  = 0;
510   dlambda = 0.5;
511   innerIter = 1;
512 
513   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
514    *
515    *  Optimality conditions for \phi:
516    *
517    *  1. lambda   <= 0
518    *  2. r        <= 0
519    *  3. r*lambda == 0
520    */
521 
522   /* Bracketing Phase */
523   r = phi(x, n, lambda, a, b, c, l, u);
524 
525   if(nonNegativeSlack) {
526     /* inequality constraint, i.e., with \xi >= 0 constraint */
527     if (r < TOL_R) return 0;
528   } else  {
529     /* equality constraint ,i.e., without \xi >= 0 constraint */
530     if (fabs(r) < TOL_R) return 0;
531   }
532 
533   if (r < 0.0){
534     lambdal = lambda;
535     rl      = r;
536     lambda  = lambda + dlambda;
537     r       = phi(x, n, lambda, a, b, c, l, u);
538     while (r < 0.0 && dlambda < BMRM_INFTY)  {
539       lambdal = lambda;
540       s       = rl/r - 1.0;
541       if (s < 0.1) s = 0.1;
542       dlambda = dlambda + dlambda/s;
543       lambda  = lambda + dlambda;
544       rl      = r;
545       r       = phi(x, n, lambda, a, b, c, l, u);
546     }
547     lambdau = lambda;
548     ru      = r;
549   } else {
550     lambdau = lambda;
551     ru      = r;
552     lambda  = lambda - dlambda;
553     r       = phi(x, n, lambda, a, b, c, l, u);
554     while (r > 0.0 && dlambda > -BMRM_INFTY) {
555       lambdau = lambda;
556       s       = ru/r - 1.0;
557       if (s < 0.1) s = 0.1;
558       dlambda = dlambda + dlambda/s;
559       lambda  = lambda - dlambda;
560       ru      = r;
561       r       = phi(x, n, lambda, a, b, c, l, u);
562     }
563     lambdal = lambda;
564     rl      = r;
565   }
566 
567   if(fabs(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
568 
569   if(ru == 0){
570     lambda = lambdau;
571     r = phi(x, n, lambda, a, b, c, l, u);
572     return innerIter;
573   }
574 
575   /* Secant Phase */
576   s       = 1.0 - rl/ru;
577   dlambda = dlambda/s;
578   lambda  = lambdau - dlambda;
579   r       = phi(x, n, lambda, a, b, c, l, u);
580 
581   while (fabs(r) > TOL_R
582          && dlambda > TOL_LAM * (1.0 + fabs(lambda))
583          && innerIter < df->maxProjIter){
584     innerIter++;
585     if (r > 0.0){
586       if (s <= 2.0){
587         lambdau = lambda;
588         ru      = r;
589         s       = 1.0 - rl/ru;
590         dlambda = (lambdau - lambdal) / s;
591         lambda  = lambdau - dlambda;
592       } else {
593         s          = ru/r-1.0;
594         if (s < 0.1) s = 0.1;
595         dlambda    = (lambdau - lambda) / s;
596         lambda_new = 0.75*lambdal + 0.25*lambda;
597         if (lambda_new < (lambda - dlambda))
598           lambda_new = lambda - dlambda;
599         lambdau    = lambda;
600         ru         = r;
601         lambda     = lambda_new;
602         s          = (lambdau - lambdal) / (lambdau - lambda);
603       }
604     } else {
605       if (s >= 2.0){
606         lambdal = lambda;
607         rl      = r;
608         s       = 1.0 - rl/ru;
609         dlambda = (lambdau - lambdal) / s;
610         lambda  = lambdau - dlambda;
611       } else {
612         s          = rl/r - 1.0;
613         if (s < 0.1) s = 0.1;
614         dlambda    = (lambda-lambdal) / s;
615         lambda_new = 0.75*lambdau + 0.25*lambda;
616         if (lambda_new > (lambda + dlambda))
617           lambda_new = lambda + dlambda;
618         lambdal    = lambda;
619         rl         = r;
620         lambda     = lambda_new;
621         s          = (lambdau - lambdal) / (lambdau-lambda);
622       }
623     }
624     r = phi(x, n, lambda, a, b, c, l, u);
625   }
626 
627   *lam_ext = lambda;
628   if(innerIter >= df->maxProjIter) {
629     ierr = PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
630   }
631   return innerIter;
632 }
633 
634 
635 #undef __FUNCT__
636 #define __FUNCT__ "solve"
637 PetscErrorCode solve(TAO_DF *df)
638 {
639   PetscErrorCode ierr;
640   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
641   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
642   PetscReal      DELTAsv, ProdDELTAsv;
643   PetscReal      c, *tempQ;
644   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
645   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
646   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
647   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
648   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
649 
650   /*** variables for the adaptive nonmonotone linesearch ***/
651   PetscInt    L, llast;
652   PetscReal   fr, fbest, fv, fc, fv0;
653 
654   c = BMRM_INFTY;
655 
656   DELTAsv = EPS_SV;
657   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
658   else  ProdDELTAsv = EPS_SV;
659 
660   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
661 
662   lam_ext = 0.0;
663 
664   /* Project the initial solution */
665   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
666 
667   /* Compute gradient
668      g = Q*x + f; */
669 
670   it = 0;
671   for (i = 0; i < dim; i++) {
672     if (fabs(x[i]) > ProdDELTAsv) ipt[it++] = i;
673   }
674 
675   ierr = PetscMemzero(t, dim*sizeof(PetscReal)); CHKERRQ(ierr);
676   for (i = 0; i < it; i++){
677     tempQ = Q[ipt[i]];
678     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
679   }
680   for (i = 0; i < dim; i++){
681     g[i] = t[i] + f[i];
682   }
683 
684 
685   /* y = -(x_{k} - g_{k}) */
686   for (i = 0; i < dim; i++){
687     y[i] = g[i] - x[i];
688   }
689 
690   /* Project x_{k} - g_{k} */
691   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
692 
693   /* y = P(x_{k} - g_{k}) - x_{k} */
694   max = ALPHA_MIN;
695   for (i = 0; i < dim; i++){
696     y[i] = tempv[i] - x[i];
697     if (fabs(y[i]) > max) max = fabs(y[i]);
698   }
699 
700   if (max < tol*1e-3){
701     lscount = 0;
702     innerIter    = 0;
703     return 0;
704   }
705 
706   alpha = 1.0 / max;
707 
708   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
709   fv0   = 0.0;
710   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
711 
712   /*** adaptive nonmonotone linesearch ***/
713   L     = 2;
714   fr    = ALPHA_MAX;
715   fbest = fv0;
716   fc    = fv0;
717   llast = 0;
718   akold = bkold = 0.0;
719 
720   /***      Iterator begins     ***/
721   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
722 
723     /* tempv = -(x_{k} - alpha*g_{k}) */
724     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
725 
726     /* Project x_{k} - alpha*g_{k} */
727     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
728 
729 
730     /* gd = \inner{d_{k}}{g_{k}}
731         d = P(x_{k} - alpha*g_{k}) - x_{k}
732     */
733     gd = 0.0;
734     for (i = 0; i < dim; i++){
735       d[i] = y[i] - x[i];
736       gd  += d[i] * g[i];
737     }
738 
739     /* Gradient computation  */
740 
741     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
742 
743     it = it2 = 0;
744     for (i = 0; i < dim; i++){
745       if (fabs(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
746     }
747     for (i = 0; i < dim; i++) {
748       if (fabs(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
749     }
750 
751     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal)); CHKERRQ(ierr);
752     /* compute Qd = Q*d */
753     if (it < it2){
754       for (i = 0; i < it; i++){
755         tempQ = Q[ipt[i]];
756         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
757       }
758     } else { /* compute Qd = Q*y-t */
759       for (i = 0; i < it2; i++){
760         tempQ = Q[ipt2[i]];
761         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
762       }
763       for (j = 0; j < dim; j++) Qd[j] -= t[j];
764     }
765 
766     /* ak = inner{d_{k}}{d_{k}} */
767     ak = 0.0;
768     for (i = 0; i < dim; i++) ak += d[i] * d[i];
769 
770     bk = 0.0;
771     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
772 
773     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
774     else lamnew = 1.0;
775 
776     /* fv is computing f(x_{k} + d_{k}) */
777     fv = 0.0;
778     for (i = 0; i < dim; i++){
779       xplus[i] = x[i] + d[i];
780       tplus[i] = t[i] + Qd[i];
781       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
782     }
783 
784     /* fr is fref */
785     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
786       lscount++;
787       fv = 0.0;
788       for (i = 0; i < dim; i++){
789         xplus[i] = x[i] + lamnew*d[i];
790         tplus[i] = t[i] + lamnew*Qd[i];
791         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
792       }
793     }
794 
795     for (i = 0; i < dim; i++){
796       sk[i] = xplus[i] - x[i];
797       yk[i] = tplus[i] - t[i];
798       x[i]  = xplus[i];
799       t[i]  = tplus[i];
800       g[i]  = t[i] + f[i];
801     }
802 
803     /* update the line search control parameters */
804     if (fv < fbest){
805       fbest = fv;
806       fc    = fv;
807       llast = 0;
808     } else {
809       fc = (fc > fv ? fc : fv);
810       llast++;
811       if (llast == L){
812         fr    = fc;
813         fc    = fv;
814         llast = 0;
815       }
816     }
817 
818     ak = bk = 0.0;
819     for (i = 0; i < dim; i++){
820       ak += sk[i] * sk[i];
821       bk += sk[i] * yk[i];
822     }
823 
824     if (bk <= EPS*ak) alpha = ALPHA_MAX;
825     else {
826       if (bkold < EPS*akold) alpha = ak/bk;
827       else alpha = (akold+ak)/(bkold+bk);
828 
829       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
830       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
831     }
832 
833     akold = ak;
834     bkold = bk;
835 
836     /*** stopping criterion based on KKT conditions ***/
837     /* at optimal, gradient of lagrangian w.r.t. x is zero */
838 
839     bk = 0.0;
840     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
841 
842     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)){
843       it     = 0;
844       luv    = 0;
845       kktlam = 0.0;
846       for (i = 0; i < dim; i++){
847         /* x[i] is active hence lagrange multipliers for box constraints
848                 are zero. The lagrange multiplier for ineq. const. is then
849                 defined as below
850         */
851         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
852           ipt[it++] = i;
853           kktlam    = kktlam - a[i]*g[i];
854         } else  uv[luv++] = i;
855       }
856 
857       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
858       else {
859         kktlam = kktlam/it;
860         info   = 1;
861         for (i = 0; i < it; i++) {
862           if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
863             info = 0;
864             break;
865           }
866         }
867         if (info == 1)  {
868           for (i = 0; i < luv; i++)  {
869             if (x[uv[i]] <= DELTAsv){
870               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
871                      not be zero. So, the gradient without beta is > 0
872               */
873               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
874                 info = 0;
875                 break;
876               }
877             } else {
878               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
879                      not be zero. So, the gradient without eta is < 0
880               */
881               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
882                 info = 0;
883                 break;
884               }
885             }
886           }
887         }
888 
889         if (info == 1) return 0;
890       }
891     }
892   }
893   return 0;
894 }
895 
896 
897