xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision fdf6c4e30aafdbc795e4f76379caa977fd5cdf5a)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*);
7 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
8 static PetscErrorCode solve(TAO_DF*);
9 
10 /*------------------------------------------------------------*/
11 /* The main solver function
12 
13    f = Remp(W)          This is what the user provides us from the application layer
14    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
15 
16    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
17 */
18 
19 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
20 {
21   PetscFunctionBegin;
22   PetscCall(PetscNew(p));
23   PetscCall(VecDuplicate(X, &(*p)->V));
24   PetscCall(VecCopy(X, (*p)->V));
25   (*p)->next = NULL;
26   PetscFunctionReturn(0);
27 }
28 
29 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
30 {
31   Vec_Chain      *p = head->next, *q;
32 
33   PetscFunctionBegin;
34   while (p) {
35     q = p->next;
36     PetscCall(VecDestroy(&p->V));
37     PetscCall(PetscFree(p));
38     p = q;
39   }
40   head->next = NULL;
41   PetscFunctionReturn(0);
42 }
43 
44 static PetscErrorCode TaoSolve_BMRM(Tao tao)
45 {
46   TAO_DF             df;
47   TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;
48 
49   /* Values and pointers to parts of the optimization problem */
50   PetscReal          f = 0.0;
51   Vec                W = tao->solution;
52   Vec                G = tao->gradient;
53   PetscReal          lambda;
54   PetscReal          bt;
55   Vec_Chain          grad_list, *tail_glist, *pgrad;
56   PetscInt           i;
57   PetscMPIInt        rank;
58 
59   /* Used in converged criteria check */
60   PetscReal          reg;
61   PetscReal          jtwt = 0.0, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
62   PetscReal          innerSolverTol;
63   MPI_Comm           comm;
64 
65   PetscFunctionBegin;
66   PetscCall(PetscObjectGetComm((PetscObject)tao,&comm));
67   PetscCallMPI(MPI_Comm_rank(comm, &rank));
68   lambda = bmrm->lambda;
69 
70   /* Check Stopping Condition */
71   tao->step = 1.0;
72   max_jtwt = -BMRM_INFTY;
73   min_jw = BMRM_INFTY;
74   innerSolverTol = 1.0;
75   epsilon = 0.0;
76 
77   if (rank == 0) {
78     PetscCall(init_df_solver(&df));
79     grad_list.next = NULL;
80     tail_glist = &grad_list;
81   }
82 
83   df.tol = 1e-6;
84   tao->reason = TAO_CONTINUE_ITERATING;
85 
86   /*-----------------Algorithm Begins------------------------*/
87   /* make the scatter */
88   PetscCall(VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w));
89   PetscCall(VecAssemblyBegin(bmrm->local_w));
90   PetscCall(VecAssemblyEnd(bmrm->local_w));
91 
92   /* NOTE: In application pass the sub-gradient of Remp(W) */
93   PetscCall(TaoComputeObjectiveAndGradient(tao, W, &f, G));
94   PetscCall(TaoLogConvergenceHistory(tao,f,1.0,0.0,tao->ksp_its));
95   PetscCall(TaoMonitor(tao,tao->niter,f,1.0,0.0,tao->step));
96   PetscCall((*tao->ops->convergencetest)(tao,tao->cnvP));
97 
98   while (tao->reason == TAO_CONTINUE_ITERATING) {
99     /* Call general purpose update function */
100     if (tao->ops->update) PetscCall((*tao->ops->update)(tao, tao->niter, tao->user_update));
101 
102     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
103     PetscCall(VecDot(W, G, &bt));
104     bt = f - bt;
105 
106     /* First gather the gradient to the rank-0 node */
107     PetscCall(VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD));
108     PetscCall(VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD));
109 
110     /* Bring up the inner solver */
111     if (rank == 0) {
112       PetscCall(ensure_df_space(tao->niter+1, &df));
113       PetscCall(make_grad_node(bmrm->local_w, &pgrad));
114       tail_glist->next = pgrad;
115       tail_glist = pgrad;
116 
117       df.a[tao->niter] = 1.0;
118       df.f[tao->niter] = -bt;
119       df.u[tao->niter] = 1.0;
120       df.l[tao->niter] = 0.0;
121 
122       /* set up the Q */
123       pgrad = grad_list.next;
124       for (i=0; i<=tao->niter; i++) {
125         PetscCheck(pgrad,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Assert that there are at least tao->niter+1 pgrad available");
126         PetscCall(VecDot(pgrad->V, bmrm->local_w, &reg));
127         df.Q[i][tao->niter] = df.Q[tao->niter][i] = reg / lambda;
128         pgrad = pgrad->next;
129       }
130 
131       if (tao->niter > 0) {
132         df.x[tao->niter] = 0.0;
133         PetscCall(solve(&df));
134       } else
135         df.x[0] = 1.0;
136 
137       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
138       jtwt = 0.0;
139       PetscCall(VecSet(bmrm->local_w, 0.0));
140       pgrad = grad_list.next;
141       for (i=0; i<=tao->niter; i++) {
142         jtwt -= df.x[i] * df.f[i];
143         PetscCall(VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V));
144         pgrad = pgrad->next;
145       }
146 
147       PetscCall(VecNorm(bmrm->local_w, NORM_2, &reg));
148       reg = 0.5*lambda*reg*reg;
149       jtwt -= reg;
150     } /* end if rank == 0 */
151 
152     /* scatter the new W to all nodes */
153     PetscCall(VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE));
154     PetscCall(VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE));
155 
156     PetscCall(TaoComputeObjectiveAndGradient(tao, W, &f, G));
157 
158     PetscCallMPI(MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm));
159     PetscCallMPI(MPI_Bcast(&reg,1,MPIU_REAL,0,comm));
160 
161     jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
162     if (jw < min_jw) min_jw = jw;
163     if (jtwt > max_jtwt) max_jtwt = jtwt;
164 
165     pre_epsilon = epsilon;
166     epsilon = min_jw - jtwt;
167 
168     if (rank == 0) {
169       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
170       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
171 
172       /* if the annealing doesn't work well, lower the inner solver tolerance */
173       if (pre_epsilon < epsilon) innerSolverTol *= 0.2;
174 
175       df.tol = innerSolverTol*0.5;
176     }
177 
178     tao->niter++;
179     PetscCall(TaoLogConvergenceHistory(tao,min_jw,epsilon,0.0,tao->ksp_its));
180     PetscCall(TaoMonitor(tao,tao->niter,min_jw,epsilon,0.0,tao->step));
181     PetscCall((*tao->ops->convergencetest)(tao,tao->cnvP));
182   }
183 
184   /* free all the memory */
185   if (rank == 0) {
186     PetscCall(destroy_grad_list(&grad_list));
187     PetscCall(destroy_df_solver(&df));
188   }
189 
190   PetscCall(VecDestroy(&bmrm->local_w));
191   PetscCall(VecScatterDestroy(&bmrm->scatter));
192   PetscFunctionReturn(0);
193 }
194 
195 /* ---------------------------------------------------------- */
196 
197 static PetscErrorCode TaoSetup_BMRM(Tao tao)
198 {
199   PetscFunctionBegin;
200   /* Allocate some arrays */
201   if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient));
202   PetscFunctionReturn(0);
203 }
204 
205 /*------------------------------------------------------------*/
206 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
207 {
208   PetscFunctionBegin;
209   PetscCall(PetscFree(tao->data));
210   PetscFunctionReturn(0);
211 }
212 
213 static PetscErrorCode TaoSetFromOptions_BMRM(PetscOptionItems *PetscOptionsObject,Tao tao)
214 {
215   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
216 
217   PetscFunctionBegin;
218   PetscOptionsHeadBegin(PetscOptionsObject,"BMRM for regularized risk minimization");
219   PetscCall(PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,NULL));
220   PetscOptionsHeadEnd();
221   PetscFunctionReturn(0);
222 }
223 
224 /*------------------------------------------------------------*/
225 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
226 {
227   PetscBool      isascii;
228 
229   PetscFunctionBegin;
230   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
231   if (isascii) {
232     PetscCall(PetscViewerASCIIPushTab(viewer));
233     PetscCall(PetscViewerASCIIPopTab(viewer));
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 /*------------------------------------------------------------*/
239 /*MC
240   TAOBMRM - bundle method for regularized risk minimization
241 
242   Options Database Keys:
243 . - tao_bmrm_lambda - regulariser weight
244 
245   Level: beginner
246 M*/
247 
248 PETSC_EXTERN PetscErrorCode TaoCreate_BMRM(Tao tao)
249 {
250   TAO_BMRM       *bmrm;
251 
252   PetscFunctionBegin;
253   tao->ops->setup = TaoSetup_BMRM;
254   tao->ops->solve = TaoSolve_BMRM;
255   tao->ops->view  = TaoView_BMRM;
256   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
257   tao->ops->destroy = TaoDestroy_BMRM;
258 
259   PetscCall(PetscNewLog(tao,&bmrm));
260   bmrm->lambda = 1.0;
261   tao->data = (void*)bmrm;
262 
263   /* Override default settings (unless already changed) */
264   if (!tao->max_it_changed) tao->max_it = 2000;
265   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
266   if (!tao->gatol_changed) tao->gatol = 1.0e-12;
267   if (!tao->grtol_changed) tao->grtol = 1.0e-12;
268 
269   PetscFunctionReturn(0);
270 }
271 
272 PetscErrorCode init_df_solver(TAO_DF *df)
273 {
274   PetscInt       i, n = INCRE_DIM;
275 
276   PetscFunctionBegin;
277   /* default values */
278   df->maxProjIter = 200;
279   df->maxPGMIter = 300000;
280   df->b = 1.0;
281 
282   /* memory space required by Dai-Fletcher */
283   df->cur_num_cp = n;
284   PetscCall(PetscMalloc1(n, &df->f));
285   PetscCall(PetscMalloc1(n, &df->a));
286   PetscCall(PetscMalloc1(n, &df->l));
287   PetscCall(PetscMalloc1(n, &df->u));
288   PetscCall(PetscMalloc1(n, &df->x));
289   PetscCall(PetscMalloc1(n, &df->Q));
290 
291   for (i = 0; i < n; i ++) {
292     PetscCall(PetscMalloc1(n, &df->Q[i]));
293   }
294 
295   PetscCall(PetscMalloc1(n, &df->g));
296   PetscCall(PetscMalloc1(n, &df->y));
297   PetscCall(PetscMalloc1(n, &df->tempv));
298   PetscCall(PetscMalloc1(n, &df->d));
299   PetscCall(PetscMalloc1(n, &df->Qd));
300   PetscCall(PetscMalloc1(n, &df->t));
301   PetscCall(PetscMalloc1(n, &df->xplus));
302   PetscCall(PetscMalloc1(n, &df->tplus));
303   PetscCall(PetscMalloc1(n, &df->sk));
304   PetscCall(PetscMalloc1(n, &df->yk));
305 
306   PetscCall(PetscMalloc1(n, &df->ipt));
307   PetscCall(PetscMalloc1(n, &df->ipt2));
308   PetscCall(PetscMalloc1(n, &df->uv));
309   PetscFunctionReturn(0);
310 }
311 
312 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
313 {
314   PetscReal      *tmp, **tmp_Q;
315   PetscInt       i, n, old_n;
316 
317   PetscFunctionBegin;
318   df->dim = dim;
319   if (dim <= df->cur_num_cp) PetscFunctionReturn(0);
320 
321   old_n = df->cur_num_cp;
322   df->cur_num_cp += INCRE_DIM;
323   n = df->cur_num_cp;
324 
325   /* memory space required by dai-fletcher */
326   PetscCall(PetscMalloc1(n, &tmp));
327   PetscCall(PetscArraycpy(tmp, df->f, old_n));
328   PetscCall(PetscFree(df->f));
329   df->f = tmp;
330 
331   PetscCall(PetscMalloc1(n, &tmp));
332   PetscCall(PetscArraycpy(tmp, df->a, old_n));
333   PetscCall(PetscFree(df->a));
334   df->a = tmp;
335 
336   PetscCall(PetscMalloc1(n, &tmp));
337   PetscCall(PetscArraycpy(tmp, df->l, old_n));
338   PetscCall(PetscFree(df->l));
339   df->l = tmp;
340 
341   PetscCall(PetscMalloc1(n, &tmp));
342   PetscCall(PetscArraycpy(tmp, df->u, old_n));
343   PetscCall(PetscFree(df->u));
344   df->u = tmp;
345 
346   PetscCall(PetscMalloc1(n, &tmp));
347   PetscCall(PetscArraycpy(tmp, df->x, old_n));
348   PetscCall(PetscFree(df->x));
349   df->x = tmp;
350 
351   PetscCall(PetscMalloc1(n, &tmp_Q));
352   for (i = 0; i < n; i ++) {
353     PetscCall(PetscMalloc1(n, &tmp_Q[i]));
354     if (i < old_n) {
355       PetscCall(PetscArraycpy(tmp_Q[i], df->Q[i], old_n));
356       PetscCall(PetscFree(df->Q[i]));
357     }
358   }
359 
360   PetscCall(PetscFree(df->Q));
361   df->Q = tmp_Q;
362 
363   PetscCall(PetscFree(df->g));
364   PetscCall(PetscMalloc1(n, &df->g));
365 
366   PetscCall(PetscFree(df->y));
367   PetscCall(PetscMalloc1(n, &df->y));
368 
369   PetscCall(PetscFree(df->tempv));
370   PetscCall(PetscMalloc1(n, &df->tempv));
371 
372   PetscCall(PetscFree(df->d));
373   PetscCall(PetscMalloc1(n, &df->d));
374 
375   PetscCall(PetscFree(df->Qd));
376   PetscCall(PetscMalloc1(n, &df->Qd));
377 
378   PetscCall(PetscFree(df->t));
379   PetscCall(PetscMalloc1(n, &df->t));
380 
381   PetscCall(PetscFree(df->xplus));
382   PetscCall(PetscMalloc1(n, &df->xplus));
383 
384   PetscCall(PetscFree(df->tplus));
385   PetscCall(PetscMalloc1(n, &df->tplus));
386 
387   PetscCall(PetscFree(df->sk));
388   PetscCall(PetscMalloc1(n, &df->sk));
389 
390   PetscCall(PetscFree(df->yk));
391   PetscCall(PetscMalloc1(n, &df->yk));
392 
393   PetscCall(PetscFree(df->ipt));
394   PetscCall(PetscMalloc1(n, &df->ipt));
395 
396   PetscCall(PetscFree(df->ipt2));
397   PetscCall(PetscMalloc1(n, &df->ipt2));
398 
399   PetscCall(PetscFree(df->uv));
400   PetscCall(PetscMalloc1(n, &df->uv));
401   PetscFunctionReturn(0);
402 }
403 
404 PetscErrorCode destroy_df_solver(TAO_DF *df)
405 {
406   PetscInt       i;
407 
408   PetscFunctionBegin;
409   PetscCall(PetscFree(df->f));
410   PetscCall(PetscFree(df->a));
411   PetscCall(PetscFree(df->l));
412   PetscCall(PetscFree(df->u));
413   PetscCall(PetscFree(df->x));
414 
415   for (i = 0; i < df->cur_num_cp; i ++) {
416     PetscCall(PetscFree(df->Q[i]));
417   }
418   PetscCall(PetscFree(df->Q));
419   PetscCall(PetscFree(df->ipt));
420   PetscCall(PetscFree(df->ipt2));
421   PetscCall(PetscFree(df->uv));
422   PetscCall(PetscFree(df->g));
423   PetscCall(PetscFree(df->y));
424   PetscCall(PetscFree(df->tempv));
425   PetscCall(PetscFree(df->d));
426   PetscCall(PetscFree(df->Qd));
427   PetscCall(PetscFree(df->t));
428   PetscCall(PetscFree(df->xplus));
429   PetscCall(PetscFree(df->tplus));
430   PetscCall(PetscFree(df->sk));
431   PetscCall(PetscFree(df->yk));
432   PetscFunctionReturn(0);
433 }
434 
435 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
436 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
437 {
438   PetscReal r = 0.0;
439   PetscInt  i;
440 
441   for (i = 0; i < n; i++) {
442     x[i] = -c[i] + lambda*a[i];
443     if (x[i] > u[i])     x[i] = u[i];
444     else if (x[i] < l[i]) x[i] = l[i];
445     r += a[i]*x[i];
446   }
447   return r - b;
448 }
449 
450 /** Modified Dai-Fletcher QP projector solves the problem:
451  *
452  *      minimise  0.5*x'*x - c'*x
453  *      subj to   a'*x = b
454  *                l \leq x \leq u
455  *
456  *  \param c The point to be projected onto feasible set
457  */
458 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
459 {
460   PetscReal      lambda, lambdal, lambdau, dlambda, lambda_new;
461   PetscReal      r, rl, ru, s;
462   PetscInt       innerIter;
463   PetscBool      nonNegativeSlack = PETSC_FALSE;
464 
465   *lam_ext = 0;
466   lambda  = 0;
467   dlambda = 0.5;
468   innerIter = 1;
469 
470   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
471    *
472    *  Optimality conditions for \phi:
473    *
474    *  1. lambda   <= 0
475    *  2. r        <= 0
476    *  3. r*lambda == 0
477    */
478 
479   /* Bracketing Phase */
480   r = phi(x, n, lambda, a, b, c, l, u);
481 
482   if (nonNegativeSlack) {
483     /* inequality constraint, i.e., with \xi >= 0 constraint */
484     if (r < TOL_R) return 0;
485   } else  {
486     /* equality constraint ,i.e., without \xi >= 0 constraint */
487     if (PetscAbsReal(r) < TOL_R) return 0;
488   }
489 
490   if (r < 0.0) {
491     lambdal = lambda;
492     rl      = r;
493     lambda  = lambda + dlambda;
494     r       = phi(x, n, lambda, a, b, c, l, u);
495     while (r < 0.0 && dlambda < BMRM_INFTY)  {
496       lambdal = lambda;
497       s       = rl/r - 1.0;
498       if (s < 0.1) s = 0.1;
499       dlambda = dlambda + dlambda/s;
500       lambda  = lambda + dlambda;
501       rl      = r;
502       r       = phi(x, n, lambda, a, b, c, l, u);
503     }
504     lambdau = lambda;
505     ru      = r;
506   } else {
507     lambdau = lambda;
508     ru      = r;
509     lambda  = lambda - dlambda;
510     r       = phi(x, n, lambda, a, b, c, l, u);
511     while (r > 0.0 && dlambda > -BMRM_INFTY) {
512       lambdau = lambda;
513       s       = ru/r - 1.0;
514       if (s < 0.1) s = 0.1;
515       dlambda = dlambda + dlambda/s;
516       lambda  = lambda - dlambda;
517       ru      = r;
518       r       = phi(x, n, lambda, a, b, c, l, u);
519     }
520     lambdal = lambda;
521     rl      = r;
522   }
523 
524   PetscCheck(PetscAbsReal(dlambda) <= BMRM_INFTY,PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
525 
526   if (ru == 0) {
527     return innerIter;
528   }
529 
530   /* Secant Phase */
531   s       = 1.0 - rl/ru;
532   dlambda = dlambda/s;
533   lambda  = lambdau - dlambda;
534   r       = phi(x, n, lambda, a, b, c, l, u);
535 
536   while (PetscAbsReal(r) > TOL_R
537          && dlambda > TOL_LAM * (1.0 + PetscAbsReal(lambda))
538          && innerIter < df->maxProjIter) {
539     innerIter++;
540     if (r > 0.0) {
541       if (s <= 2.0) {
542         lambdau = lambda;
543         ru      = r;
544         s       = 1.0 - rl/ru;
545         dlambda = (lambdau - lambdal) / s;
546         lambda  = lambdau - dlambda;
547       } else {
548         s          = ru/r-1.0;
549         if (s < 0.1) s = 0.1;
550         dlambda    = (lambdau - lambda) / s;
551         lambda_new = 0.75*lambdal + 0.25*lambda;
552         if (lambda_new < (lambda - dlambda))
553           lambda_new = lambda - dlambda;
554         lambdau    = lambda;
555         ru         = r;
556         lambda     = lambda_new;
557         s          = (lambdau - lambdal) / (lambdau - lambda);
558       }
559     } else {
560       if (s >= 2.0) {
561         lambdal = lambda;
562         rl      = r;
563         s       = 1.0 - rl/ru;
564         dlambda = (lambdau - lambdal) / s;
565         lambda  = lambdau - dlambda;
566       } else {
567         s          = rl/r - 1.0;
568         if (s < 0.1) s = 0.1;
569         dlambda    = (lambda-lambdal) / s;
570         lambda_new = 0.75*lambdau + 0.25*lambda;
571         if (lambda_new > (lambda + dlambda))
572           lambda_new = lambda + dlambda;
573         lambdal    = lambda;
574         rl         = r;
575         lambda     = lambda_new;
576         s          = (lambdau - lambdal) / (lambdau-lambda);
577       }
578     }
579     r = phi(x, n, lambda, a, b, c, l, u);
580   }
581 
582   *lam_ext = lambda;
583   if (innerIter >= df->maxProjIter) {
584     PetscCall(PetscInfo(NULL,"WARNING: DaiFletcher max iterations\n"));
585   }
586   return innerIter;
587 }
588 
589 PetscErrorCode solve(TAO_DF *df)
590 {
591   PetscInt       i, j, innerIter, it, it2, luv, info;
592   PetscReal      gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
593   PetscReal      DELTAsv, ProdDELTAsv;
594   PetscReal      c, *tempQ;
595   PetscReal      *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
596   PetscReal      *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
597   PetscReal      *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
598   PetscReal      **Q = df->Q, *f = df->f, *t = df->t;
599   PetscInt       dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
600 
601   /* variables for the adaptive nonmonotone linesearch */
602   PetscInt    L, llast;
603   PetscReal   fr, fbest, fv, fc, fv0;
604 
605   c = BMRM_INFTY;
606 
607   DELTAsv = EPS_SV;
608   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
609   else  ProdDELTAsv = EPS_SV;
610 
611   for (i = 0; i < dim; i++)  tempv[i] = -x[i];
612 
613   lam_ext = 0.0;
614 
615   /* Project the initial solution */
616   project(dim, a, b, tempv, l, u, x, &lam_ext, df);
617 
618   /* Compute gradient
619      g = Q*x + f; */
620 
621   it = 0;
622   for (i = 0; i < dim; i++) {
623     if (PetscAbsReal(x[i]) > ProdDELTAsv) ipt[it++] = i;
624   }
625 
626   PetscCall(PetscArrayzero(t, dim));
627   for (i = 0; i < it; i++) {
628     tempQ = Q[ipt[i]];
629     for (j = 0; j < dim; j++) t[j] += (tempQ[j]*x[ipt[i]]);
630   }
631   for (i = 0; i < dim; i++) {
632     g[i] = t[i] + f[i];
633   }
634 
635   /* y = -(x_{k} - g_{k}) */
636   for (i = 0; i < dim; i++) {
637     y[i] = g[i] - x[i];
638   }
639 
640   /* Project x_{k} - g_{k} */
641   project(dim, a, b, y, l, u, tempv, &lam_ext, df);
642 
643   /* y = P(x_{k} - g_{k}) - x_{k} */
644   max = ALPHA_MIN;
645   for (i = 0; i < dim; i++) {
646     y[i] = tempv[i] - x[i];
647     if (PetscAbsReal(y[i]) > max) max = PetscAbsReal(y[i]);
648   }
649 
650   if (max < tol*1e-3) {
651     return 0;
652   }
653 
654   alpha = 1.0 / max;
655 
656   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
657   fv0   = 0.0;
658   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5*t[i] + f[i]);
659 
660   /* adaptive nonmonotone linesearch */
661   L     = 2;
662   fr    = ALPHA_MAX;
663   fbest = fv0;
664   fc    = fv0;
665   llast = 0;
666   akold = bkold = 0.0;
667 
668   /*     Iterator begins     */
669   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
670 
671     /* tempv = -(x_{k} - alpha*g_{k}) */
672     for (i = 0; i < dim; i++)  tempv[i] = alpha*g[i] - x[i];
673 
674     /* Project x_{k} - alpha*g_{k} */
675     project(dim, a, b, tempv, l, u, y, &lam_ext, df);
676 
677     /* gd = \inner{d_{k}}{g_{k}}
678         d = P(x_{k} - alpha*g_{k}) - x_{k}
679     */
680     gd = 0.0;
681     for (i = 0; i < dim; i++) {
682       d[i] = y[i] - x[i];
683       gd  += d[i] * g[i];
684     }
685 
686     /* Gradient computation  */
687 
688     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
689 
690     it = it2 = 0;
691     for (i = 0; i < dim; i++) {
692       if (PetscAbsReal(d[i]) > (ProdDELTAsv*1.0e-2)) ipt[it++]   = i;
693     }
694     for (i = 0; i < dim; i++) {
695       if (PetscAbsReal(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
696     }
697 
698     PetscCall(PetscArrayzero(Qd, dim));
699     /* compute Qd = Q*d */
700     if (it < it2) {
701       for (i = 0; i < it; i++) {
702         tempQ = Q[ipt[i]];
703         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
704       }
705     } else { /* compute Qd = Q*y-t */
706       for (i = 0; i < it2; i++) {
707         tempQ = Q[ipt2[i]];
708         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
709       }
710       for (j = 0; j < dim; j++) Qd[j] -= t[j];
711     }
712 
713     /* ak = inner{d_{k}}{d_{k}} */
714     ak = 0.0;
715     for (i = 0; i < dim; i++) ak += d[i] * d[i];
716 
717     bk = 0.0;
718     for (i = 0; i < dim; i++) bk += d[i]*Qd[i];
719 
720     if (bk > EPS*ak && gd < 0.0)  lamnew = -gd/bk;
721     else lamnew = 1.0;
722 
723     /* fv is computing f(x_{k} + d_{k}) */
724     fv = 0.0;
725     for (i = 0; i < dim; i++) {
726       xplus[i] = x[i] + d[i];
727       tplus[i] = t[i] + Qd[i];
728       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
729     }
730 
731     /* fr is fref */
732     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)) {
733       fv = 0.0;
734       for (i = 0; i < dim; i++) {
735         xplus[i] = x[i] + lamnew*d[i];
736         tplus[i] = t[i] + lamnew*Qd[i];
737         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
738       }
739     }
740 
741     for (i = 0; i < dim; i++) {
742       sk[i] = xplus[i] - x[i];
743       yk[i] = tplus[i] - t[i];
744       x[i]  = xplus[i];
745       t[i]  = tplus[i];
746       g[i]  = t[i] + f[i];
747     }
748 
749     /* update the line search control parameters */
750     if (fv < fbest) {
751       fbest = fv;
752       fc    = fv;
753       llast = 0;
754     } else {
755       fc = (fc > fv ? fc : fv);
756       llast++;
757       if (llast == L) {
758         fr    = fc;
759         fc    = fv;
760         llast = 0;
761       }
762     }
763 
764     ak = bk = 0.0;
765     for (i = 0; i < dim; i++) {
766       ak += sk[i] * sk[i];
767       bk += sk[i] * yk[i];
768     }
769 
770     if (bk <= EPS*ak) alpha = ALPHA_MAX;
771     else {
772       if (bkold < EPS*akold) alpha = ak/bk;
773       else alpha = (akold+ak)/(bkold+bk);
774 
775       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
776       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
777     }
778 
779     akold = ak;
780     bkold = bk;
781 
782     /* stopping criterion based on KKT conditions */
783     /* at optimal, gradient of lagrangian w.r.t. x is zero */
784 
785     bk = 0.0;
786     for (i = 0; i < dim; i++) bk +=  x[i] * x[i];
787 
788     if (PetscSqrtReal(ak) < tol*10 * PetscSqrtReal(bk)) {
789       it     = 0;
790       luv    = 0;
791       kktlam = 0.0;
792       for (i = 0; i < dim; i++) {
793         /* x[i] is active hence lagrange multipliers for box constraints
794                 are zero. The lagrange multiplier for ineq. const. is then
795                 defined as below
796         */
797         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)) {
798           ipt[it++] = i;
799           kktlam    = kktlam - a[i]*g[i];
800         } else  uv[luv++] = i;
801       }
802 
803       if (it == 0 && PetscSqrtReal(ak) < tol*0.5 * PetscSqrtReal(bk)) return 0;
804       else {
805         kktlam = kktlam/it;
806         info   = 1;
807         for (i = 0; i < it; i++) {
808           if (PetscAbsReal(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
809             info = 0;
810             break;
811           }
812         }
813         if (info == 1)  {
814           for (i = 0; i < luv; i++)  {
815             if (x[uv[i]] <= DELTAsv) {
816               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
817                      not be zero. So, the gradient without beta is > 0
818               */
819               if (g[uv[i]] + kktlam*a[uv[i]] < -tol) {
820                 info = 0;
821                 break;
822               }
823             } else {
824               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
825                      not be zero. So, the gradient without eta is < 0
826               */
827               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
828                 info = 0;
829                 break;
830               }
831             }
832           }
833         }
834 
835         if (info == 1) return 0;
836       }
837     }
838   }
839   return 0;
840 }
841