xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision 503c0ea9b45bcfbcebbb1ea5341243bbc69f0bea)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 /*------------------------------------------------------------*/
11 /* The main solver function
12 
13    f = Remp(W)          This is what the user provides us from the application layer
14    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
15 
16    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
17 */
18 
19 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
20 {
21   PetscFunctionBegin;
22   PetscCall(PetscNew(p));
23   PetscCall(VecDuplicate(X, &(*p)->V));
24   PetscCall(VecCopy(X, (*p)->V));
25   (*p)->next = NULL;
26   PetscFunctionReturn(0);
27 }
28 
29 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
30 {
31   Vec_Chain      *p = head->next, *q;
32 
33   PetscFunctionBegin;
34   while (p) {
35     q = p->next;
36     PetscCall(VecDestroy(&p->V));
37     PetscCall(PetscFree(p));
38     p = q;
39   }
40   head->next = NULL;
41   PetscFunctionReturn(0);
42 }
43 
44 static PetscErrorCode TaoSolve_BMRM(Tao tao)
45 {
46   TAO_DF             df;
47   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
48 
49   /* Values and pointers to parts of the optimization problem */
50   PetscReal          f = 0.0;
51   Vec                W = tao->solution;
52   Vec                G = tao->gradient;
53   PetscReal          lambda;
54   PetscReal          bt;
55   Vec_Chain          grad_list, *tail_glist, *pgrad;
56   PetscInt           i;
57   PetscMPIInt        rank;
58 
59   /* Used in converged criteria check */
60   PetscReal          reg;
61   PetscReal          jtwt = 0.0, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
62   PetscReal          innerSolverTol;
63   MPI_Comm           comm;
64 
65   PetscFunctionBegin;
66   PetscCall(PetscObjectGetComm((PetscObject)tao,&comm));
67   PetscCallMPI(MPI_Comm_rank(comm, &rank));
68   lambda = bmrm->lambda;
69 
70   /* Check Stopping Condition */
71   tao->step = 1.0;
72   max_jtwt = -BMRM_INFTY;
73   min_jw = BMRM_INFTY;
74   innerSolverTol = 1.0;
75   epsilon = 0.0;
76 
77   if (rank == 0) {
78     PetscCall(init_df_solver(&df));
79     grad_list.next = NULL;
80     tail_glist = &grad_list;
81   }
82 
83   df.tol = 1e-6;
84   tao->reason = TAO_CONTINUE_ITERATING;
85 
86   /*-----------------Algorithm Begins------------------------*/
87   /* make the scatter */
88   PetscCall(VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w));
89   PetscCall(VecAssemblyBegin(bmrm->local_w));
90   PetscCall(VecAssemblyEnd(bmrm->local_w));
91 
92   /* NOTE: In application pass the sub-gradient of Remp(W) */
93   PetscCall(TaoComputeObjectiveAndGradient(tao, W, &f, G));
94   PetscCall(TaoLogConvergenceHistory(tao,f,1.0,0.0,tao->ksp_its));
95   PetscCall(TaoMonitor(tao,tao->niter,f,1.0,0.0,tao->step));
96   PetscCall((*tao->ops->convergencetest)(tao,tao->cnvP));
97 
98   while (tao->reason == TAO_CONTINUE_ITERATING) {
99     /* Call general purpose update function */
100     if (tao->ops->update) {
101       PetscCall((*tao->ops->update)(tao, tao->niter, tao->user_update));
102     }
103 
104     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
105     PetscCall(VecDot(W, G, &bt));
106     bt = f - bt;
107 
108     /* First gather the gradient to the rank-0 node */
109     PetscCall(VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD));
110     PetscCall(VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD));
111 
112     /* Bring up the inner solver */
113     if (rank == 0) {
114       PetscCall(ensure_df_space(tao->niter+1, &df));
115       PetscCall(make_grad_node(bmrm->local_w, &pgrad));
116       tail_glist->next = pgrad;
117       tail_glist = pgrad;
118 
119       df.a[tao->niter] = 1.0;
120       df.f[tao->niter] = -bt;
121       df.u[tao->niter] = 1.0;
122       df.l[tao->niter] = 0.0;
123 
124       /* set up the Q */
125       pgrad = grad_list.next;
126       for (i=0; i<=tao->niter; i++) {
127         PetscCheck(pgrad,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Assert that there are at least tao->niter+1 pgrad available");
128         PetscCall(VecDot(pgrad->V, bmrm->local_w, &reg));
129         df.Q[i][tao->niter] = df.Q[tao->niter][i] = reg / lambda;
130         pgrad = pgrad->next;
131       }
132 
133       if (tao->niter > 0) {
134         df.x[tao->niter] = 0.0;
135         PetscCall(solve(&df));
136       } else
137         df.x[0] = 1.0;
138 
139       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
140       jtwt = 0.0;
141       PetscCall(VecSet(bmrm->local_w, 0.0));
142       pgrad = grad_list.next;
143       for (i=0; i<=tao->niter; i++) {
144         jtwt -= df.x[i] * df.f[i];
145         PetscCall(VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V));
146         pgrad = pgrad->next;
147       }
148 
149       PetscCall(VecNorm(bmrm->local_w, NORM_2, &reg));
150       reg = 0.5*lambda*reg*reg;
151       jtwt -= reg;
152     } /* end if rank == 0 */
153 
154     /* scatter the new W to all nodes */
155     PetscCall(VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE));
156     PetscCall(VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE));
157 
158     PetscCall(TaoComputeObjectiveAndGradient(tao, W, &f, G));
159 
160     PetscCallMPI(MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm));
161     PetscCallMPI(MPI_Bcast(&reg,1,MPIU_REAL,0,comm));
162 
163     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
164     if (jw < min_jw) min_jw = jw;
165     if (jtwt > max_jtwt) max_jtwt = jtwt;
166 
167     pre_epsilon = epsilon;
168     epsilon = min_jw - jtwt;
169 
170     if (rank == 0) {
171       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
172       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
173 
174       /* if the annealing doesn't work well, lower the inner solver tolerance */
175       if (pre_epsilon < epsilon) innerSolverTol *= 0.2;
176 
177       df.tol = innerSolverTol*0.5;
178     }
179 
180     tao->niter++;
181     PetscCall(TaoLogConvergenceHistory(tao,min_jw,epsilon,0.0,tao->ksp_its));
182     PetscCall(TaoMonitor(tao,tao->niter,min_jw,epsilon,0.0,tao->step));
183     PetscCall((*tao->ops->convergencetest)(tao,tao->cnvP));
184   }
185 
186   /* free all the memory */
187   if (rank == 0) {
188     PetscCall(destroy_grad_list(&grad_list));
189     PetscCall(destroy_df_solver(&df));
190   }
191 
192   PetscCall(VecDestroy(&bmrm->local_w));
193   PetscCall(VecScatterDestroy(&bmrm->scatter));
194   PetscFunctionReturn(0);
195 }
196 
197 /* ---------------------------------------------------------- */
198 
199 static PetscErrorCode TaoSetup_BMRM(Tao tao)
200 {
201   PetscFunctionBegin;
202   /* Allocate some arrays */
203   if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient));
204   PetscFunctionReturn(0);
205 }
206 
207 /*------------------------------------------------------------*/
208 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
209 {
210   PetscFunctionBegin;
211   PetscCall(PetscFree(tao->data));
212   PetscFunctionReturn(0);
213 }
214 
215 static PetscErrorCode TaoSetFromOptions_BMRM(PetscOptionItems *PetscOptionsObject,Tao tao)
216 {
217   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
218 
219   PetscFunctionBegin;
220   PetscCall(PetscOptionsHead(PetscOptionsObject,"BMRM for regularized risk minimization"));
221   PetscCall(PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,NULL));
222   PetscCall(PetscOptionsTail());
223   PetscFunctionReturn(0);
224 }
225 
226 /*------------------------------------------------------------*/
227 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
228 {
229   PetscBool      isascii;
230 
231   PetscFunctionBegin;
232   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
233   if (isascii) {
234     PetscCall(PetscViewerASCIIPushTab(viewer));
235     PetscCall(PetscViewerASCIIPopTab(viewer));
236   }
237   PetscFunctionReturn(0);
238 }
239 
240 /*------------------------------------------------------------*/
241 /*MC
242   TAOBMRM - bundle method for regularized risk minimization
243 
244   Options Database Keys:
245 . - tao_bmrm_lambda - regulariser weight
246 
247   Level: beginner
248 M*/
249 
250 PETSC_EXTERN PetscErrorCode TaoCreate_BMRM(Tao tao)
251 {
252   TAO_BMRM       *bmrm;
253 
254   PetscFunctionBegin;
255   tao->ops->setup = TaoSetup_BMRM;
256   tao->ops->solve = TaoSolve_BMRM;
257   tao->ops->view  = TaoView_BMRM;
258   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
259   tao->ops->destroy = TaoDestroy_BMRM;
260 
261   PetscCall(PetscNewLog(tao,&bmrm));
262   bmrm->lambda = 1.0;
263   tao->data = (void*)bmrm;
264 
265   /* Override default settings (unless already changed) */
266   if (!tao->max_it_changed) tao->max_it = 2000;
267   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
268   if (!tao->gatol_changed) tao->gatol = 1.0e-12;
269   if (!tao->grtol_changed) tao->grtol = 1.0e-12;
270 
271   PetscFunctionReturn(0);
272 }
273 
274 PetscErrorCode init_df_solver(TAO_DF *df)
275 {
276   PetscInt       i, n = INCRE_DIM;
277 
278   PetscFunctionBegin;
279   /* default values */
280   df->maxProjIter = 200;
281   df->maxPGMIter = 300000;
282   df->b = 1.0;
283 
284   /* memory space required by Dai-Fletcher */
285   df->cur_num_cp = n;
286   PetscCall(PetscMalloc1(n, &df->f));
287   PetscCall(PetscMalloc1(n, &df->a));
288   PetscCall(PetscMalloc1(n, &df->l));
289   PetscCall(PetscMalloc1(n, &df->u));
290   PetscCall(PetscMalloc1(n, &df->x));
291   PetscCall(PetscMalloc1(n, &df->Q));
292 
293   for (i = 0; i < n; i ++) {
294     PetscCall(PetscMalloc1(n, &df->Q[i]));
295   }
296 
297   PetscCall(PetscMalloc1(n, &df->g));
298   PetscCall(PetscMalloc1(n, &df->y));
299   PetscCall(PetscMalloc1(n, &df->tempv));
300   PetscCall(PetscMalloc1(n, &df->d));
301   PetscCall(PetscMalloc1(n, &df->Qd));
302   PetscCall(PetscMalloc1(n, &df->t));
303   PetscCall(PetscMalloc1(n, &df->xplus));
304   PetscCall(PetscMalloc1(n, &df->tplus));
305   PetscCall(PetscMalloc1(n, &df->sk));
306   PetscCall(PetscMalloc1(n, &df->yk));
307 
308   PetscCall(PetscMalloc1(n, &df->ipt));
309   PetscCall(PetscMalloc1(n, &df->ipt2));
310   PetscCall(PetscMalloc1(n, &df->uv));
311   PetscFunctionReturn(0);
312 }
313 
314 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
315 {
316   PetscReal      *tmp, **tmp_Q;
317   PetscInt       i, n, old_n;
318 
319   PetscFunctionBegin;
320   df->dim = dim;
321   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
322 
323   old_n = df->cur_num_cp;
324   df->cur_num_cp += INCRE_DIM;
325   n = df->cur_num_cp;
326 
327   /* memory space required by dai-fletcher */
328   PetscCall(PetscMalloc1(n, &tmp));
329   PetscCall(PetscArraycpy(tmp, df->f, old_n));
330   PetscCall(PetscFree(df->f));
331   df->f = tmp;
332 
333   PetscCall(PetscMalloc1(n, &tmp));
334   PetscCall(PetscArraycpy(tmp, df->a, old_n));
335   PetscCall(PetscFree(df->a));
336   df->a = tmp;
337 
338   PetscCall(PetscMalloc1(n, &tmp));
339   PetscCall(PetscArraycpy(tmp, df->l, old_n));
340   PetscCall(PetscFree(df->l));
341   df->l = tmp;
342 
343   PetscCall(PetscMalloc1(n, &tmp));
344   PetscCall(PetscArraycpy(tmp, df->u, old_n));
345   PetscCall(PetscFree(df->u));
346   df->u = tmp;
347 
348   PetscCall(PetscMalloc1(n, &tmp));
349   PetscCall(PetscArraycpy(tmp, df->x, old_n));
350   PetscCall(PetscFree(df->x));
351   df->x = tmp;
352 
353   PetscCall(PetscMalloc1(n, &tmp_Q));
354   for (i = 0; i < n; i ++) {
355     PetscCall(PetscMalloc1(n, &tmp_Q[i]));
356     if (i < old_n) {
357       PetscCall(PetscArraycpy(tmp_Q[i], df->Q[i], old_n));
358       PetscCall(PetscFree(df->Q[i]));
359     }
360   }
361 
362   PetscCall(PetscFree(df->Q));
363   df->Q = tmp_Q;
364 
365   PetscCall(PetscFree(df->g));
366   PetscCall(PetscMalloc1(n, &df->g));
367 
368   PetscCall(PetscFree(df->y));
369   PetscCall(PetscMalloc1(n, &df->y));
370 
371   PetscCall(PetscFree(df->tempv));
372   PetscCall(PetscMalloc1(n, &df->tempv));
373 
374   PetscCall(PetscFree(df->d));
375   PetscCall(PetscMalloc1(n, &df->d));
376 
377   PetscCall(PetscFree(df->Qd));
378   PetscCall(PetscMalloc1(n, &df->Qd));
379 
380   PetscCall(PetscFree(df->t));
381   PetscCall(PetscMalloc1(n, &df->t));
382 
383   PetscCall(PetscFree(df->xplus));
384   PetscCall(PetscMalloc1(n, &df->xplus));
385 
386   PetscCall(PetscFree(df->tplus));
387   PetscCall(PetscMalloc1(n, &df->tplus));
388 
389   PetscCall(PetscFree(df->sk));
390   PetscCall(PetscMalloc1(n, &df->sk));
391 
392   PetscCall(PetscFree(df->yk));
393   PetscCall(PetscMalloc1(n, &df->yk));
394 
395   PetscCall(PetscFree(df->ipt));
396   PetscCall(PetscMalloc1(n, &df->ipt));
397 
398   PetscCall(PetscFree(df->ipt2));
399   PetscCall(PetscMalloc1(n, &df->ipt2));
400 
401   PetscCall(PetscFree(df->uv));
402   PetscCall(PetscMalloc1(n, &df->uv));
403   PetscFunctionReturn(0);
404 }
405 
406 PetscErrorCode destroy_df_solver(TAO_DF *df)
407 {
408   PetscInt       i;
409 
410   PetscFunctionBegin;
411   PetscCall(PetscFree(df->f));
412   PetscCall(PetscFree(df->a));
413   PetscCall(PetscFree(df->l));
414   PetscCall(PetscFree(df->u));
415   PetscCall(PetscFree(df->x));
416 
417   for (i = 0; i < df->cur_num_cp; i ++) {
418     PetscCall(PetscFree(df->Q[i]));
419   }
420   PetscCall(PetscFree(df->Q));
421   PetscCall(PetscFree(df->ipt));
422   PetscCall(PetscFree(df->ipt2));
423   PetscCall(PetscFree(df->uv));
424   PetscCall(PetscFree(df->g));
425   PetscCall(PetscFree(df->y));
426   PetscCall(PetscFree(df->tempv));
427   PetscCall(PetscFree(df->d));
428   PetscCall(PetscFree(df->Qd));
429   PetscCall(PetscFree(df->t));
430   PetscCall(PetscFree(df->xplus));
431   PetscCall(PetscFree(df->tplus));
432   PetscCall(PetscFree(df->sk));
433   PetscCall(PetscFree(df->yk));
434   PetscFunctionReturn(0);
435 }
436 
437 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
438 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
439 {
440   PetscReal r = 0.0;
441   PetscInt  i;
442 
443   for (i = 0; i < n; i++) {
444     x[i] = -c[i] + lambda*a[i];
445     if (x[i] > u[i])     x[i] = u[i];
446     else if (x[i] < l[i]) x[i] = l[i];
447     r += a[i]*x[i];
448   }
449   return r - b;
450 }
451 
452 /** Modified Dai-Fletcher QP projector solves the problem:
453  *
454  *      minimise  0.5*x'*x - c'*x
455  *      subj to   a'*x = b
456  *                l \leq x \leq u
457  *
458  *  \param c The point to be projected onto feasible set
459  */
460 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
461 {
462   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
463   PetscReal      r, rl, ru, s;
464   PetscInt       innerIter;
465   PetscBool      nonNegativeSlack = PETSC_FALSE;
466 
467   *lam_ext = 0;
468   lambda  = 0;
469   dlambda = 0.5;
470   innerIter = 1;
471 
472   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
473    *
474    *  Optimality conditions for \phi:
475    *
476    *  1. lambda   <= 0
477    *  2. r        <= 0
478    *  3. r*lambda == 0
479    */
480 
481   /* Bracketing Phase */
482   r = phi(x, n, lambda, a, b, c, l, u);
483 
484   if (nonNegativeSlack) {
485     /* inequality constraint, i.e., with \xi >= 0 constraint */
486     if (r < TOL_R) return 0;
487   } else  {
488     /* equality constraint ,i.e., without \xi >= 0 constraint */
489     if (PetscAbsReal(r) < TOL_R) return 0;
490   }
491 
492   if (r < 0.0) {
493     lambdal = lambda;
494     rl      = r;
495     lambda  = lambda + dlambda;
496     r       = phi(x, n, lambda, a, b, c, l, u);
497     while (r < 0.0 && dlambda < BMRM_INFTY)  {
498       lambdal = lambda;
499       s       = rl/r - 1.0;
500       if (s < 0.1) s = 0.1;
501       dlambda = dlambda + dlambda/s;
502       lambda  = lambda + dlambda;
503       rl      = r;
504       r       = phi(x, n, lambda, a, b, c, l, u);
505     }
506     lambdau = lambda;
507     ru      = r;
508   } else {
509     lambdau = lambda;
510     ru      = r;
511     lambda  = lambda - dlambda;
512     r       = phi(x, n, lambda, a, b, c, l, u);
513     while (r > 0.0 && dlambda > -BMRM_INFTY) {
514       lambdau = lambda;
515       s       = ru/r - 1.0;
516       if (s < 0.1) s = 0.1;
517       dlambda = dlambda + dlambda/s;
518       lambda  = lambda - dlambda;
519       ru      = r;
520       r       = phi(x, n, lambda, a, b, c, l, u);
521     }
522     lambdal = lambda;
523     rl      = r;
524   }
525 
526   PetscCheck(PetscAbsReal(dlambda) <= BMRM_INFTY,PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
527 
528   if (ru == 0) {
529     return innerIter;
530   }
531 
532   /* Secant Phase */
533   s       = 1.0 - rl/ru;
534   dlambda = dlambda/s;
535   lambda  = lambdau - dlambda;
536   r       = phi(x, n, lambda, a, b, c, l, u);
537 
538   while (PetscAbsReal(r) > TOL_R
539          && dlambda > TOL_LAM * (1.0 + PetscAbsReal(lambda))
540          && innerIter < df->maxProjIter) {
541     innerIter++;
542     if (r > 0.0) {
543       if (s <= 2.0) {
544         lambdau = lambda;
545         ru      = r;
546         s       = 1.0 - rl/ru;
547         dlambda = (lambdau - lambdal) / s;
548         lambda  = lambdau - dlambda;
549       } else {
550         s          = ru/r-1.0;
551         if (s < 0.1) s = 0.1;
552         dlambda    = (lambdau - lambda) / s;
553         lambda_new = 0.75*lambdal + 0.25*lambda;
554         if (lambda_new < (lambda - dlambda))
555           lambda_new = lambda - dlambda;
556         lambdau    = lambda;
557         ru         = r;
558         lambda     = lambda_new;
559         s          = (lambdau - lambdal) / (lambdau - lambda);
560       }
561     } else {
562       if (s >= 2.0) {
563         lambdal = lambda;
564         rl      = r;
565         s       = 1.0 - rl/ru;
566         dlambda = (lambdau - lambdal) / s;
567         lambda  = lambdau - dlambda;
568       } else {
569         s          = rl/r - 1.0;
570         if (s < 0.1) s = 0.1;
571         dlambda    = (lambda-lambdal) / s;
572         lambda_new = 0.75*lambdau + 0.25*lambda;
573         if (lambda_new > (lambda + dlambda))
574           lambda_new = lambda + dlambda;
575         lambdal    = lambda;
576         rl         = r;
577         lambda     = lambda_new;
578         s          = (lambdau - lambdal) / (lambdau-lambda);
579       }
580     }
581     r = phi(x, n, lambda, a, b, c, l, u);
582   }
583 
584   *lam_ext = lambda;
585   if (innerIter >= df->maxProjIter) {
586     PetscCall(PetscInfo(NULL,"WARNING: DaiFletcher max iterations\n"));
587   }
588   return innerIter;
589 }
590 
591 PetscErrorCode solve(TAO_DF *df)
592 {
593   PetscInt       i, j, innerIter, it, it2, luv, info, lscount = 0;
594   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
595   PetscReal      DELTAsv, ProdDELTAsv;
596   PetscReal      c, *tempQ;
597   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
598   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
599   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
600   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
601   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
602 
603   /* variables for the adaptive nonmonotone linesearch */
604   PetscInt    L, llast;
605   PetscReal   fr, fbest, fv, fc, fv0;
606 
607   c = BMRM_INFTY;
608 
609   DELTAsv = EPS_SV;
610   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
611   else  ProdDELTAsv = EPS_SV;
612 
613   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
614 
615   lam_ext = 0.0;
616 
617   /* Project the initial solution */
618   project(dim, a, b, tempv, l, u, x, &lam_ext, df);
619 
620   /* Compute gradient
621      g = Q*x + f; */
622 
623   it = 0;
624   for (i = 0; i < dim; i++) {
625     if (PetscAbsReal(x[i]) > ProdDELTAsv) ipt[it++] = i;
626   }
627 
628   PetscCall(PetscArrayzero(t, dim));
629   for (i = 0; i < it; i++) {
630     tempQ = Q[ipt[i]];
631     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
632   }
633   for (i = 0; i < dim; i++) {
634     g[i] = t[i] + f[i];
635   }
636 
637   /* y = -(x_{k} - g_{k}) */
638   for (i = 0; i < dim; i++) {
639     y[i] = g[i] - x[i];
640   }
641 
642   /* Project x_{k} - g_{k} */
643   project(dim, a, b, y, l, u, tempv, &lam_ext, df);
644 
645   /* y = P(x_{k} - g_{k}) - x_{k} */
646   max = ALPHA_MIN;
647   for (i = 0; i < dim; i++) {
648     y[i] = tempv[i] - x[i];
649     if (PetscAbsReal(y[i]) > max) max = PetscAbsReal(y[i]);
650   }
651 
652   if (max < tol*1e-3) {
653     return 0;
654   }
655 
656   alpha = 1.0 / max;
657 
658   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
659   fv0   = 0.0;
660   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
661 
662   /* adaptive nonmonotone linesearch */
663   L     = 2;
664   fr    = ALPHA_MAX;
665   fbest = fv0;
666   fc    = fv0;
667   llast = 0;
668   akold = bkold = 0.0;
669 
670   /*     Iterator begins     */
671   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
672 
673     /* tempv = -(x_{k} - alpha*g_{k}) */
674     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
675 
676     /* Project x_{k} - alpha*g_{k} */
677     project(dim, a, b, tempv, l, u, y, &lam_ext, df);
678 
679     /* gd = \inner{d_{k}}{g_{k}}
680         d = P(x_{k} - alpha*g_{k}) - x_{k}
681     */
682     gd = 0.0;
683     for (i = 0; i < dim; i++) {
684       d[i] = y[i] - x[i];
685       gd  += d[i] * g[i];
686     }
687 
688     /* Gradient computation  */
689 
690     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
691 
692     it = it2 = 0;
693     for (i = 0; i < dim; i++) {
694       if (PetscAbsReal(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
695     }
696     for (i = 0; i < dim; i++) {
697       if (PetscAbsReal(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
698     }
699 
700     PetscCall(PetscArrayzero(Qd, dim));
701     /* compute Qd = Q*d */
702     if (it < it2) {
703       for (i = 0; i < it; i++) {
704         tempQ = Q[ipt[i]];
705         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
706       }
707     } else { /* compute Qd = Q*y-t */
708       for (i = 0; i < it2; i++) {
709         tempQ = Q[ipt2[i]];
710         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
711       }
712       for (j = 0; j < dim; j++) Qd[j] -= t[j];
713     }
714 
715     /* ak = inner{d_{k}}{d_{k}} */
716     ak = 0.0;
717     for (i = 0; i < dim; i++) ak += d[i] * d[i];
718 
719     bk = 0.0;
720     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
721 
722     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
723     else lamnew = 1.0;
724 
725     /* fv is computing f(x_{k} + d_{k}) */
726     fv = 0.0;
727     for (i = 0; i < dim; i++) {
728       xplus[i] = x[i] + d[i];
729       tplus[i] = t[i] + Qd[i];
730       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
731     }
732 
733     /* fr is fref */
734     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)) {
735       lscount++;
736       fv = 0.0;
737       for (i = 0; i < dim; i++) {
738         xplus[i] = x[i] + lamnew*d[i];
739         tplus[i] = t[i] + lamnew*Qd[i];
740         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
741       }
742     }
743 
744     for (i = 0; i < dim; i++) {
745       sk[i] = xplus[i] - x[i];
746       yk[i] = tplus[i] - t[i];
747       x[i]  = xplus[i];
748       t[i]  = tplus[i];
749       g[i]  = t[i] + f[i];
750     }
751 
752     /* update the line search control parameters */
753     if (fv < fbest) {
754       fbest = fv;
755       fc    = fv;
756       llast = 0;
757     } else {
758       fc = (fc > fv ? fc : fv);
759       llast++;
760       if (llast == L) {
761         fr    = fc;
762         fc    = fv;
763         llast = 0;
764       }
765     }
766 
767     ak = bk = 0.0;
768     for (i = 0; i < dim; i++) {
769       ak += sk[i] * sk[i];
770       bk += sk[i] * yk[i];
771     }
772 
773     if (bk <= EPS*ak) alpha = ALPHA_MAX;
774     else {
775       if (bkold < EPS*akold) alpha = ak/bk;
776       else alpha = (akold+ak)/(bkold+bk);
777 
778       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
779       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
780     }
781 
782     akold = ak;
783     bkold = bk;
784 
785     /* stopping criterion based on KKT conditions */
786     /* at optimal, gradient of lagrangian w.r.t. x is zero */
787 
788     bk = 0.0;
789     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
790 
791     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)) {
792       it     = 0;
793       luv    = 0;
794       kktlam = 0.0;
795       for (i = 0; i < dim; i++) {
796         /* x[i] is active hence lagrange multipliers for box constraints
797                 are zero. The lagrange multiplier for ineq. const. is then
798                 defined as below
799         */
800         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)) {
801           ipt[it++] = i;
802           kktlam    = kktlam - a[i]*g[i];
803         } else  uv[luv++] = i;
804       }
805 
806       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
807       else {
808         kktlam = kktlam/it;
809         info   = 1;
810         for (i = 0; i < it; i++) {
811           if (PetscAbsReal(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
812             info = 0;
813             break;
814           }
815         }
816         if (info == 1)  {
817           for (i = 0; i < luv; i++)  {
818             if (x[uv[i]] <= DELTAsv) {
819               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
820                      not be zero. So, the gradient without beta is > 0
821               */
822               if (g[uv[i]] + kktlam*a[uv[i]] < -tol) {
823                 info = 0;
824                 break;
825               }
826             } else {
827               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
828                      not be zero. So, the gradient without eta is < 0
829               */
830               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
831                 info = 0;
832                 break;
833               }
834             }
835           }
836         }
837 
838         if (info == 1) return 0;
839       }
840     }
841   }
842   return 0;
843 }
844