xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision a3fa217bfefef3d54995bb82b2b5ea8a3c3e9019)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 
11 /*------------------------------------------------------------*/
12 /* The main solver function
13 
14    f = Remp(W)          This is what the user provides us from the application layer
15    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
16 
17    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
18 */
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "make_grad_node"
22 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   ierr = PetscNew(p);CHKERRQ(ierr);
28   ierr = VecDuplicate(X, &(*p)->V);CHKERRQ(ierr);
29   ierr = VecCopy(X, (*p)->V);CHKERRQ(ierr);
30   (*p)->next = NULL;
31   PetscFunctionReturn(0);
32 }
33 
34 #undef __FUNCT__
35 #define __FUNCT__ "destroy_grad_list"
36 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
37 {
38   PetscErrorCode ierr;
39   Vec_Chain      *p = head->next, *q;
40 
41   PetscFunctionBegin;
42   while(p) {
43     q = p->next;
44     ierr = VecDestroy(&p->V);CHKERRQ(ierr);
45     ierr = PetscFree(p);CHKERRQ(ierr);
46     p = q;
47   }
48   head->next = NULL;
49   PetscFunctionReturn(0);
50 }
51 
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "TaoSolve_BMRM"
55 static PetscErrorCode TaoSolve_BMRM(Tao tao)
56 {
57   PetscErrorCode     ierr;
58   TaoConvergedReason reason;
59   TAO_DF             df;
60   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
61 
62   /* Values and pointers to parts of the optimization problem */
63   PetscReal          f = 0.0;
64   Vec                W = tao->solution;
65   Vec                G = tao->gradient;
66   PetscReal          lambda;
67   PetscReal          bt;
68   Vec_Chain          grad_list, *tail_glist, *pgrad;
69   PetscInt           iter = 0;
70   PetscInt           i;
71   PetscMPIInt        rank;
72 
73   /* Used in converged criteria check */
74   PetscReal          reg;
75   PetscReal          jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
76   PetscReal          innerSolverTol;
77   MPI_Comm           comm;
78 
79   PetscFunctionBegin;
80   ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
81   ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
82   lambda = bmrm->lambda;
83 
84   /* Check Stopping Condition */
85   tao->step = 1.0;
86   max_jtwt = -BMRM_INFTY;
87   min_jw = BMRM_INFTY;
88   innerSolverTol = 1.0;
89   epsilon = 0.0;
90 
91   if (!rank) {
92     ierr = init_df_solver(&df);CHKERRQ(ierr);
93     grad_list.next = NULL;
94     tail_glist = &grad_list;
95   }
96 
97   df.tol = 1e-6;
98   reason = TAO_CONTINUE_ITERATING;
99 
100   /*-----------------Algorithm Begins------------------------*/
101   /* make the scatter */
102   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
103   ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
104   ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);
105 
106   /* NOTE: In application pass the sub-gradient of Remp(W) */
107   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
108   ierr = TaoMonitor(tao,iter,f,1.0,0.0,tao->step,&reason);CHKERRQ(ierr);
109   while (reason == TAO_CONTINUE_ITERATING) {
110     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
111     ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
112     bt = f - bt;
113 
114     /* First gather the gradient to the master node */
115     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
116     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
117 
118     /* Bring up the inner solver */
119     if (!rank) {
120       ierr = ensure_df_space(iter+1, &df); CHKERRQ(ierr);
121       ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
122       tail_glist->next = pgrad;
123       tail_glist = pgrad;
124 
125       df.a[iter] = 1.0;
126       df.f[iter] = -bt;
127       df.u[iter] = 1.0;
128       df.l[iter] = 0.0;
129 
130       /* set up the Q */
131       pgrad = grad_list.next;
132       for (i=0; i<=iter; i++) {
133         ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
134         df.Q[i][iter] = df.Q[iter][i] = reg / lambda;
135         pgrad = pgrad->next;
136       }
137 
138       if (iter > 0) {
139         df.x[iter] = 0.0;
140         ierr = solve(&df); CHKERRQ(ierr);
141       } else
142         df.x[0] = 1.0;
143 
144       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
145       jtwt = 0.0;
146       ierr = VecSet(bmrm->local_w, 0.0); CHKERRQ(ierr);
147       pgrad = grad_list.next;
148       for (i=0; i<=iter; i++) {
149         jtwt -= df.x[i] * df.f[i];
150         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V); CHKERRQ(ierr);
151         pgrad = pgrad->next;
152       }
153 
154       ierr = VecNorm(bmrm->local_w, NORM_2, &reg); CHKERRQ(ierr);
155       reg = 0.5*lambda*reg*reg;
156       jtwt -= reg;
157     } /* end if rank == 0 */
158 
159     /* scatter the new W to all nodes */
160     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
161     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
162 
163     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
164 
165     ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
166     ierr = MPI_Bcast(&reg,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
167 
168     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
169     if (jw < min_jw) min_jw = jw;
170     if (jtwt > max_jtwt) max_jtwt = jtwt;
171 
172     pre_epsilon = epsilon;
173     epsilon = min_jw - jtwt;
174 
175     if (!rank) {
176       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
177       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
178 
179       /* if the annealing doesn't work well, lower the inner solver tolerance */
180       if(pre_epsilon < epsilon) innerSolverTol *= 0.2;
181 
182       df.tol = innerSolverTol*0.5;
183     }
184 
185     iter++;
186     ierr = TaoMonitor(tao,iter,min_jw,epsilon,0.0,tao->step,&reason);CHKERRQ(ierr);
187   }
188 
189   /* free all the memory */
190   if (!rank) {
191     ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
192     ierr = destroy_df_solver(&df);CHKERRQ(ierr);
193   }
194 
195   ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
196   ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
197   PetscFunctionReturn(0);
198 }
199 
200 
201 /* ---------------------------------------------------------- */
202 
203 #undef __FUNCT__
204 #define __FUNCT__ "TaoSetup_BMRM"
205 static PetscErrorCode TaoSetup_BMRM(Tao tao)
206 {
207 
208   PetscErrorCode ierr;
209 
210   PetscFunctionBegin;
211   /* Allocate some arrays */
212   if (!tao->gradient) {
213     ierr = VecDuplicate(tao->solution, &tao->gradient);   CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 /*------------------------------------------------------------*/
219 #undef __FUNCT__
220 #define __FUNCT__ "TaoDestroy_BMRM"
221 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
222 {
223   PetscErrorCode ierr;
224 
225   PetscFunctionBegin;
226   ierr = PetscFree(tao->data);CHKERRQ(ierr);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "TaoSetFromOptions_BMRM"
232 static PetscErrorCode TaoSetFromOptions_BMRM(Tao tao)
233 {
234   PetscErrorCode ierr;
235   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
236   PetscBool      flg;
237 
238   PetscFunctionBegin;
239   ierr = PetscOptionsHead("BMRM for regularized risk minimization");CHKERRQ(ierr);
240   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,&flg); CHKERRQ(ierr);
241   ierr = PetscOptionsTail();CHKERRQ(ierr);
242   PetscFunctionReturn(0);
243 }
244 
245 /*------------------------------------------------------------*/
246 #undef __FUNCT__
247 #define __FUNCT__ "TaoView_BMRM"
248 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
249 {
250   PetscBool      isascii;
251   PetscErrorCode ierr;
252 
253   PetscFunctionBegin;
254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
255   if (isascii) {
256     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
257     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
258   }
259   PetscFunctionReturn(0);
260 }
261 
262 /*------------------------------------------------------------*/
263 EXTERN_C_BEGIN
264 #undef __FUNCT__
265 #define __FUNCT__ "TaoCreate_BMRM"
266 PetscErrorCode TaoCreate_BMRM(Tao tao)
267 {
268   TAO_BMRM       *bmrm;
269   PetscErrorCode ierr;
270 
271   PetscFunctionBegin;
272   tao->ops->setup = TaoSetup_BMRM;
273   tao->ops->solve = TaoSolve_BMRM;
274   tao->ops->view  = TaoView_BMRM;
275   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
276   tao->ops->destroy = TaoDestroy_BMRM;
277 
278   ierr = PetscNewLog(tao,&bmrm);CHKERRQ(ierr);
279   bmrm->lambda = 1.0;
280   tao->data = (void*)bmrm;
281 
282   /* Note: May need to be tuned! */
283   tao->max_it = 2048;
284   tao->max_funcs = 300000;
285   tao->fatol = 1e-20;
286   tao->frtol = 1e-25;
287   tao->gatol = 1e-25;
288   tao->grtol = 1e-25;
289 
290   PetscFunctionReturn(0);
291 }
292 EXTERN_C_END
293 
294 #undef __FUNCT__
295 #define __FUNCT__ "init_df_solver"
296 PetscErrorCode init_df_solver(TAO_DF *df)
297 {
298   PetscInt       i, n = INCRE_DIM;
299   PetscErrorCode ierr;
300 
301   PetscFunctionBegin;
302   /* default values */
303   df->maxProjIter = 200;
304   df->maxPGMIter = 300000;
305   df->b = 1.0;
306 
307   /* memory space required by Dai-Fletcher */
308   df->cur_num_cp = n;
309   ierr = PetscMalloc1(n, &df->f); CHKERRQ(ierr);
310   ierr = PetscMalloc1(n, &df->a); CHKERRQ(ierr);
311   ierr = PetscMalloc1(n, &df->l); CHKERRQ(ierr);
312   ierr = PetscMalloc1(n, &df->u); CHKERRQ(ierr);
313   ierr = PetscMalloc1(n, &df->x); CHKERRQ(ierr);
314   ierr = PetscMalloc1(n, &df->Q); CHKERRQ(ierr);
315 
316   for (i = 0; i < n; i ++) {
317     ierr = PetscMalloc1(n, &df->Q[i]); CHKERRQ(ierr);
318   }
319 
320   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
321   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
322   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
323   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
324   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
325   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
326   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
327   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
328   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
329   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
330 
331   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
332   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
333   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "ensure_df_space"
339 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
340 {
341   PetscErrorCode ierr;
342   PetscReal      *tmp, **tmp_Q;
343   PetscInt       i, n, old_n;
344 
345   PetscFunctionBegin;
346   df->dim = dim;
347   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
348 
349   old_n = df->cur_num_cp;
350   df->cur_num_cp += INCRE_DIM;
351   n = df->cur_num_cp;
352 
353   /* memory space required by dai-fletcher */
354   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
355   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
356   ierr = PetscFree(df->f); CHKERRQ(ierr);
357   df->f = tmp;
358 
359   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
360   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
361   ierr = PetscFree(df->a); CHKERRQ(ierr);
362   df->a = tmp;
363 
364   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
365   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
366   ierr = PetscFree(df->l); CHKERRQ(ierr);
367   df->l = tmp;
368 
369   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
370   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
371   ierr = PetscFree(df->u); CHKERRQ(ierr);
372   df->u = tmp;
373 
374   ierr = PetscMalloc1(n, &tmp); CHKERRQ(ierr);
375   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n); CHKERRQ(ierr);
376   ierr = PetscFree(df->x); CHKERRQ(ierr);
377   df->x = tmp;
378 
379   ierr = PetscMalloc1(n, &tmp_Q); CHKERRQ(ierr);
380   for (i = 0; i < n; i ++) {
381     ierr = PetscMalloc1(n, &tmp_Q[i]); CHKERRQ(ierr);
382     if (i < old_n) {
383       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n); CHKERRQ(ierr);
384       ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
385     }
386   }
387 
388   ierr = PetscFree(df->Q); CHKERRQ(ierr);
389   df->Q = tmp_Q;
390 
391   ierr = PetscFree(df->g); CHKERRQ(ierr);
392   ierr = PetscMalloc1(n, &df->g); CHKERRQ(ierr);
393 
394   ierr = PetscFree(df->y); CHKERRQ(ierr);
395   ierr = PetscMalloc1(n, &df->y); CHKERRQ(ierr);
396 
397   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
398   ierr = PetscMalloc1(n, &df->tempv); CHKERRQ(ierr);
399 
400   ierr = PetscFree(df->d); CHKERRQ(ierr);
401   ierr = PetscMalloc1(n, &df->d); CHKERRQ(ierr);
402 
403   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
404   ierr = PetscMalloc1(n, &df->Qd); CHKERRQ(ierr);
405 
406   ierr = PetscFree(df->t); CHKERRQ(ierr);
407   ierr = PetscMalloc1(n, &df->t); CHKERRQ(ierr);
408 
409   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
410   ierr = PetscMalloc1(n, &df->xplus); CHKERRQ(ierr);
411 
412   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
413   ierr = PetscMalloc1(n, &df->tplus); CHKERRQ(ierr);
414 
415   ierr = PetscFree(df->sk); CHKERRQ(ierr);
416   ierr = PetscMalloc1(n, &df->sk); CHKERRQ(ierr);
417 
418   ierr = PetscFree(df->yk); CHKERRQ(ierr);
419   ierr = PetscMalloc1(n, &df->yk); CHKERRQ(ierr);
420 
421   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
422   ierr = PetscMalloc1(n, &df->ipt); CHKERRQ(ierr);
423 
424   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
425   ierr = PetscMalloc1(n, &df->ipt2); CHKERRQ(ierr);
426 
427   ierr = PetscFree(df->uv); CHKERRQ(ierr);
428   ierr = PetscMalloc1(n, &df->uv); CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "destroy_df_solver"
434 PetscErrorCode destroy_df_solver(TAO_DF *df)
435 {
436   PetscErrorCode ierr;
437   PetscInt       i;
438 
439   PetscFunctionBegin;
440   ierr = PetscFree(df->f); CHKERRQ(ierr);
441   ierr = PetscFree(df->a); CHKERRQ(ierr);
442   ierr = PetscFree(df->l); CHKERRQ(ierr);
443   ierr = PetscFree(df->u); CHKERRQ(ierr);
444   ierr = PetscFree(df->x); CHKERRQ(ierr);
445 
446   for (i = 0; i < df->cur_num_cp; i ++) {
447     ierr = PetscFree(df->Q[i]); CHKERRQ(ierr);
448   }
449   ierr = PetscFree(df->Q); CHKERRQ(ierr);
450   ierr = PetscFree(df->ipt); CHKERRQ(ierr);
451   ierr = PetscFree(df->ipt2); CHKERRQ(ierr);
452   ierr = PetscFree(df->uv); CHKERRQ(ierr);
453   ierr = PetscFree(df->g); CHKERRQ(ierr);
454   ierr = PetscFree(df->y); CHKERRQ(ierr);
455   ierr = PetscFree(df->tempv); CHKERRQ(ierr);
456   ierr = PetscFree(df->d); CHKERRQ(ierr);
457   ierr = PetscFree(df->Qd); CHKERRQ(ierr);
458   ierr = PetscFree(df->t); CHKERRQ(ierr);
459   ierr = PetscFree(df->xplus); CHKERRQ(ierr);
460   ierr = PetscFree(df->tplus); CHKERRQ(ierr);
461   ierr = PetscFree(df->sk); CHKERRQ(ierr);
462   ierr = PetscFree(df->yk); CHKERRQ(ierr);
463   PetscFunctionReturn(0);
464 }
465 
466 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
467 #undef __FUNCT__
468 #define __FUNCT__ "phi"
469 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
470 {
471   PetscReal r = 0.0;
472   PetscInt  i;
473 
474   for (i = 0; i < n; i++){
475     x[i] = -c[i] + lambda*a[i];
476     if (x[i] > u[i])     x[i] = u[i];
477     else if(x[i] < l[i]) x[i] = l[i];
478     r += a[i]*x[i];
479   }
480   return r - b;
481 }
482 
483 /** Modified Dai-Fletcher QP projector solves the problem:
484  *
485  *      minimise  0.5*x'*x - c'*x
486  *      subj to   a'*x = b
487  *                l \leq x \leq u
488  *
489  *  \param c The point to be projected onto feasible set
490  */
491 #undef __FUNCT__
492 #define __FUNCT__ "project"
493 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
494 {
495   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
496   PetscReal      r, rl, ru, s;
497   PetscInt       innerIter;
498   PetscBool      nonNegativeSlack = PETSC_FALSE;
499   PetscErrorCode ierr;
500 
501   *lam_ext = 0;
502   lambda  = 0;
503   dlambda = 0.5;
504   innerIter = 1;
505 
506   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
507    *
508    *  Optimality conditions for \phi:
509    *
510    *  1. lambda   <= 0
511    *  2. r        <= 0
512    *  3. r*lambda == 0
513    */
514 
515   /* Bracketing Phase */
516   r = phi(x, n, lambda, a, b, c, l, u);
517 
518   if(nonNegativeSlack) {
519     /* inequality constraint, i.e., with \xi >= 0 constraint */
520     if (r < TOL_R) return 0;
521   } else  {
522     /* equality constraint ,i.e., without \xi >= 0 constraint */
523     if (fabs(r) < TOL_R) return 0;
524   }
525 
526   if (r < 0.0){
527     lambdal = lambda;
528     rl      = r;
529     lambda  = lambda + dlambda;
530     r       = phi(x, n, lambda, a, b, c, l, u);
531     while (r < 0.0 && dlambda < BMRM_INFTY)  {
532       lambdal = lambda;
533       s       = rl/r - 1.0;
534       if (s < 0.1) s = 0.1;
535       dlambda = dlambda + dlambda/s;
536       lambda  = lambda + dlambda;
537       rl      = r;
538       r       = phi(x, n, lambda, a, b, c, l, u);
539     }
540     lambdau = lambda;
541     ru      = r;
542   } else {
543     lambdau = lambda;
544     ru      = r;
545     lambda  = lambda - dlambda;
546     r       = phi(x, n, lambda, a, b, c, l, u);
547     while (r > 0.0 && dlambda > -BMRM_INFTY) {
548       lambdau = lambda;
549       s       = ru/r - 1.0;
550       if (s < 0.1) s = 0.1;
551       dlambda = dlambda + dlambda/s;
552       lambda  = lambda - dlambda;
553       ru      = r;
554       r       = phi(x, n, lambda, a, b, c, l, u);
555     }
556     lambdal = lambda;
557     rl      = r;
558   }
559 
560   if(fabs(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
561 
562   if(ru == 0){
563     lambda = lambdau;
564     r = phi(x, n, lambda, a, b, c, l, u);
565     return innerIter;
566   }
567 
568   /* Secant Phase */
569   s       = 1.0 - rl/ru;
570   dlambda = dlambda/s;
571   lambda  = lambdau - dlambda;
572   r       = phi(x, n, lambda, a, b, c, l, u);
573 
574   while (fabs(r) > TOL_R
575          && dlambda > TOL_LAM * (1.0 + fabs(lambda))
576          && innerIter < df->maxProjIter){
577     innerIter++;
578     if (r > 0.0){
579       if (s <= 2.0){
580         lambdau = lambda;
581         ru      = r;
582         s       = 1.0 - rl/ru;
583         dlambda = (lambdau - lambdal) / s;
584         lambda  = lambdau - dlambda;
585       } else {
586         s          = ru/r-1.0;
587         if (s < 0.1) s = 0.1;
588         dlambda    = (lambdau - lambda) / s;
589         lambda_new = 0.75*lambdal + 0.25*lambda;
590         if (lambda_new < (lambda - dlambda))
591           lambda_new = lambda - dlambda;
592         lambdau    = lambda;
593         ru         = r;
594         lambda     = lambda_new;
595         s          = (lambdau - lambdal) / (lambdau - lambda);
596       }
597     } else {
598       if (s >= 2.0){
599         lambdal = lambda;
600         rl      = r;
601         s       = 1.0 - rl/ru;
602         dlambda = (lambdau - lambdal) / s;
603         lambda  = lambdau - dlambda;
604       } else {
605         s          = rl/r - 1.0;
606         if (s < 0.1) s = 0.1;
607         dlambda    = (lambda-lambdal) / s;
608         lambda_new = 0.75*lambdau + 0.25*lambda;
609         if (lambda_new > (lambda + dlambda))
610           lambda_new = lambda + dlambda;
611         lambdal    = lambda;
612         rl         = r;
613         lambda     = lambda_new;
614         s          = (lambdau - lambdal) / (lambdau-lambda);
615       }
616     }
617     r = phi(x, n, lambda, a, b, c, l, u);
618   }
619 
620   *lam_ext = lambda;
621   if(innerIter >= df->maxProjIter) {
622     ierr = PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");CHKERRQ(ierr);
623   }
624   return innerIter;
625 }
626 
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "solve"
630 PetscErrorCode solve(TAO_DF *df)
631 {
632   PetscErrorCode ierr;
633   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
634   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
635   PetscReal      DELTAsv, ProdDELTAsv;
636   PetscReal      c, *tempQ;
637   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
638   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
639   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
640   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
641   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
642 
643   /*** variables for the adaptive nonmonotone linesearch ***/
644   PetscInt    L, llast;
645   PetscReal   fr, fbest, fv, fc, fv0;
646 
647   c = BMRM_INFTY;
648 
649   DELTAsv = EPS_SV;
650   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
651   else  ProdDELTAsv = EPS_SV;
652 
653   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
654 
655   lam_ext = 0.0;
656 
657   /* Project the initial solution */
658   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
659 
660   /* Compute gradient
661      g = Q*x + f; */
662 
663   it = 0;
664   for (i = 0; i < dim; i++) {
665     if (fabs(x[i]) > ProdDELTAsv) ipt[it++] = i;
666   }
667 
668   ierr = PetscMemzero(t, dim*sizeof(PetscReal)); CHKERRQ(ierr);
669   for (i = 0; i < it; i++){
670     tempQ = Q[ipt[i]];
671     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
672   }
673   for (i = 0; i < dim; i++){
674     g[i] = t[i] + f[i];
675   }
676 
677 
678   /* y = -(x_{k} - g_{k}) */
679   for (i = 0; i < dim; i++){
680     y[i] = g[i] - x[i];
681   }
682 
683   /* Project x_{k} - g_{k} */
684   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
685 
686   /* y = P(x_{k} - g_{k}) - x_{k} */
687   max = ALPHA_MIN;
688   for (i = 0; i < dim; i++){
689     y[i] = tempv[i] - x[i];
690     if (fabs(y[i]) > max) max = fabs(y[i]);
691   }
692 
693   if (max < tol*1e-3){
694     lscount = 0;
695     innerIter    = 0;
696     return 0;
697   }
698 
699   alpha = 1.0 / max;
700 
701   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
702   fv0   = 0.0;
703   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
704 
705   /*** adaptive nonmonotone linesearch ***/
706   L     = 2;
707   fr    = ALPHA_MAX;
708   fbest = fv0;
709   fc    = fv0;
710   llast = 0;
711   akold = bkold = 0.0;
712 
713   /***      Iterator begins     ***/
714   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
715 
716     /* tempv = -(x_{k} - alpha*g_{k}) */
717     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
718 
719     /* Project x_{k} - alpha*g_{k} */
720     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
721 
722 
723     /* gd = \inner{d_{k}}{g_{k}}
724         d = P(x_{k} - alpha*g_{k}) - x_{k}
725     */
726     gd = 0.0;
727     for (i = 0; i < dim; i++){
728       d[i] = y[i] - x[i];
729       gd  += d[i] * g[i];
730     }
731 
732     /* Gradient computation  */
733 
734     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
735 
736     it = it2 = 0;
737     for (i = 0; i < dim; i++){
738       if (fabs(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
739     }
740     for (i = 0; i < dim; i++) {
741       if (fabs(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
742     }
743 
744     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal)); CHKERRQ(ierr);
745     /* compute Qd = Q*d */
746     if (it < it2){
747       for (i = 0; i < it; i++){
748         tempQ = Q[ipt[i]];
749         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
750       }
751     } else { /* compute Qd = Q*y-t */
752       for (i = 0; i < it2; i++){
753         tempQ = Q[ipt2[i]];
754         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
755       }
756       for (j = 0; j < dim; j++) Qd[j] -= t[j];
757     }
758 
759     /* ak = inner{d_{k}}{d_{k}} */
760     ak = 0.0;
761     for (i = 0; i < dim; i++) ak += d[i] * d[i];
762 
763     bk = 0.0;
764     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
765 
766     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
767     else lamnew = 1.0;
768 
769     /* fv is computing f(x_{k} + d_{k}) */
770     fv = 0.0;
771     for (i = 0; i < dim; i++){
772       xplus[i] = x[i] + d[i];
773       tplus[i] = t[i] + Qd[i];
774       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
775     }
776 
777     /* fr is fref */
778     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
779       lscount++;
780       fv = 0.0;
781       for (i = 0; i < dim; i++){
782         xplus[i] = x[i] + lamnew*d[i];
783         tplus[i] = t[i] + lamnew*Qd[i];
784         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
785       }
786     }
787 
788     for (i = 0; i < dim; i++){
789       sk[i] = xplus[i] - x[i];
790       yk[i] = tplus[i] - t[i];
791       x[i]  = xplus[i];
792       t[i]  = tplus[i];
793       g[i]  = t[i] + f[i];
794     }
795 
796     /* update the line search control parameters */
797     if (fv < fbest){
798       fbest = fv;
799       fc    = fv;
800       llast = 0;
801     } else {
802       fc = (fc > fv ? fc : fv);
803       llast++;
804       if (llast == L){
805         fr    = fc;
806         fc    = fv;
807         llast = 0;
808       }
809     }
810 
811     ak = bk = 0.0;
812     for (i = 0; i < dim; i++){
813       ak += sk[i] * sk[i];
814       bk += sk[i] * yk[i];
815     }
816 
817     if (bk <= EPS*ak) alpha = ALPHA_MAX;
818     else {
819       if (bkold < EPS*akold) alpha = ak/bk;
820       else alpha = (akold+ak)/(bkold+bk);
821 
822       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
823       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
824     }
825 
826     akold = ak;
827     bkold = bk;
828 
829     /*** stopping criterion based on KKT conditions ***/
830     /* at optimal, gradient of lagrangian w.r.t. x is zero */
831 
832     bk = 0.0;
833     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
834 
835     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)){
836       it     = 0;
837       luv    = 0;
838       kktlam = 0.0;
839       for (i = 0; i < dim; i++){
840         /* x[i] is active hence lagrange multipliers for box constraints
841                 are zero. The lagrange multiplier for ineq. const. is then
842                 defined as below
843         */
844         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
845           ipt[it++] = i;
846           kktlam    = kktlam - a[i]*g[i];
847         } else  uv[luv++] = i;
848       }
849 
850       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
851       else {
852         kktlam = kktlam/it;
853         info   = 1;
854         for (i = 0; i < it; i++) {
855           if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
856             info = 0;
857             break;
858           }
859         }
860         if (info == 1)  {
861           for (i = 0; i < luv; i++)  {
862             if (x[uv[i]] <= DELTAsv){
863               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
864                      not be zero. So, the gradient without beta is > 0
865               */
866               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
867                 info = 0;
868                 break;
869               }
870             } else {
871               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
872                      not be zero. So, the gradient without eta is < 0
873               */
874               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
875                 info = 0;
876                 break;
877               }
878             }
879           }
880         }
881 
882         if (info == 1) return 0;
883       }
884     }
885   }
886   return 0;
887 }
888 
889 
890