xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision aca0776feee4da889fbf4f9f3c60ccde70044ebc)
1 #include <../src/tao/unconstrained/impls/bmrm/bmrm.h>
2 
3 static PetscErrorCode init_df_solver(TAO_DF *);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF *);
5 static PetscErrorCode destroy_df_solver(TAO_DF *);
6 static PetscReal      phi(PetscReal *, PetscInt, PetscReal, PetscReal *, PetscReal, PetscReal *, PetscReal *, PetscReal *);
7 static PetscInt       project(PetscInt, PetscReal *, PetscReal, PetscReal *, PetscReal *, PetscReal *, PetscReal *, PetscReal *, TAO_DF *);
8 static PetscErrorCode solve(TAO_DF *);
9 
10 /*------------------------------------------------------------*/
11 /* The main solver function
12 
13    f = Remp(W)          This is what the user provides us from the application layer
14    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
15 
16    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
17 */
18 
19 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
20 {
21   PetscFunctionBegin;
22   PetscCall(PetscNew(p));
23   PetscCall(VecDuplicate(X, &(*p)->V));
24   PetscCall(VecCopy(X, (*p)->V));
25   (*p)->next = NULL;
26   PetscFunctionReturn(PETSC_SUCCESS);
27 }
28 
29 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
30 {
31   Vec_Chain *p = head->next, *q;
32 
33   PetscFunctionBegin;
34   while (p) {
35     q = p->next;
36     PetscCall(VecDestroy(&p->V));
37     PetscCall(PetscFree(p));
38     p = q;
39   }
40   head->next = NULL;
41   PetscFunctionReturn(PETSC_SUCCESS);
42 }
43 
44 static PetscErrorCode TaoSolve_BMRM(Tao tao)
45 {
46   TAO_DF    df;
47   TAO_BMRM *bmrm = (TAO_BMRM *)tao->data;
48 
49   /* Values and pointers to parts of the optimization problem */
50   PetscReal   f = 0.0;
51   Vec         W = tao->solution;
52   Vec         G = tao->gradient;
53   PetscReal   lambda;
54   PetscReal   bt;
55   Vec_Chain   grad_list, *tail_glist, *pgrad;
56   PetscInt    i;
57   PetscMPIInt rank;
58 
59   /* Used in converged criteria check */
60   PetscReal reg;
61   PetscReal jtwt = 0.0, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
62   PetscReal innerSolverTol;
63   MPI_Comm  comm;
64 
65   PetscFunctionBegin;
66   PetscCall(PetscObjectGetComm((PetscObject)tao, &comm));
67   PetscCallMPI(MPI_Comm_rank(comm, &rank));
68   lambda = bmrm->lambda;
69 
70   /* Check Stopping Condition */
71   tao->step      = 1.0;
72   max_jtwt       = -BMRM_INFTY;
73   min_jw         = BMRM_INFTY;
74   innerSolverTol = 1.0;
75   epsilon        = 0.0;
76 
77   if (rank == 0) {
78     PetscCall(init_df_solver(&df));
79     grad_list.next = NULL;
80     tail_glist     = &grad_list;
81   }
82 
83   df.tol      = 1e-6;
84   tao->reason = TAO_CONTINUE_ITERATING;
85 
86   /*-----------------Algorithm Begins------------------------*/
87   /* make the scatter */
88   PetscCall(VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w));
89   PetscCall(VecAssemblyBegin(bmrm->local_w));
90   PetscCall(VecAssemblyEnd(bmrm->local_w));
91 
92   /* NOTE: In application pass the sub-gradient of Remp(W) */
93   PetscCall(TaoComputeObjectiveAndGradient(tao, W, &f, G));
94   PetscCall(TaoLogConvergenceHistory(tao, f, 1.0, 0.0, tao->ksp_its));
95   PetscCall(TaoMonitor(tao, tao->niter, f, 1.0, 0.0, tao->step));
96   PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
97 
98   while (tao->reason == TAO_CONTINUE_ITERATING) {
99     /* Call general purpose update function */
100     PetscTryTypeMethod(tao, update, tao->niter, tao->user_update);
101 
102     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
103     PetscCall(VecDot(W, G, &bt));
104     bt = f - bt;
105 
106     /* First gather the gradient to the rank-0 node */
107     PetscCall(VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD));
108     PetscCall(VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD));
109 
110     /* Bring up the inner solver */
111     if (rank == 0) {
112       PetscCall(ensure_df_space(tao->niter + 1, &df));
113       PetscCall(make_grad_node(bmrm->local_w, &pgrad));
114       tail_glist->next = pgrad;
115       tail_glist       = pgrad;
116 
117       df.a[tao->niter] = 1.0;
118       df.f[tao->niter] = -bt;
119       df.u[tao->niter] = 1.0;
120       df.l[tao->niter] = 0.0;
121 
122       /* set up the Q */
123       pgrad = grad_list.next;
124       for (i = 0; i <= tao->niter; i++) {
125         PetscCheck(pgrad, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Assert that there are at least tao->niter+1 pgrad available");
126         PetscCall(VecDot(pgrad->V, bmrm->local_w, &reg));
127         df.Q[i][tao->niter] = df.Q[tao->niter][i] = reg / lambda;
128         pgrad                                     = pgrad->next;
129       }
130 
131       if (tao->niter > 0) {
132         df.x[tao->niter] = 0.0;
133         PetscCall(solve(&df));
134       } else df.x[0] = 1.0;
135 
136       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
137       jtwt = 0.0;
138       PetscCall(VecSet(bmrm->local_w, 0.0));
139       pgrad = grad_list.next;
140       for (i = 0; i <= tao->niter; i++) {
141         jtwt -= df.x[i] * df.f[i];
142         PetscCall(VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V));
143         pgrad = pgrad->next;
144       }
145 
146       PetscCall(VecNorm(bmrm->local_w, NORM_2, &reg));
147       reg = 0.5 * lambda * reg * reg;
148       jtwt -= reg;
149     } /* end if rank == 0 */
150 
151     /* scatter the new W to all nodes */
152     PetscCall(VecScatterBegin(bmrm->scatter, bmrm->local_w, W, INSERT_VALUES, SCATTER_REVERSE));
153     PetscCall(VecScatterEnd(bmrm->scatter, bmrm->local_w, W, INSERT_VALUES, SCATTER_REVERSE));
154 
155     PetscCall(TaoComputeObjectiveAndGradient(tao, W, &f, G));
156 
157     PetscCallMPI(MPI_Bcast(&jtwt, 1, MPIU_REAL, 0, comm));
158     PetscCallMPI(MPI_Bcast(&reg, 1, MPIU_REAL, 0, comm));
159 
160     jw = reg + f; /* J(w) = regularizer + Remp(w) */
161     if (jw < min_jw) min_jw = jw;
162     if (jtwt > max_jtwt) max_jtwt = jtwt;
163 
164     pre_epsilon = epsilon;
165     epsilon     = min_jw - jtwt;
166 
167     if (rank == 0) {
168       if (innerSolverTol > epsilon) innerSolverTol = epsilon;
169       else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;
170 
171       /* if the annealing doesn't work well, lower the inner solver tolerance */
172       if (pre_epsilon < epsilon) innerSolverTol *= 0.2;
173 
174       df.tol = innerSolverTol * 0.5;
175     }
176 
177     tao->niter++;
178     PetscCall(TaoLogConvergenceHistory(tao, min_jw, epsilon, 0.0, tao->ksp_its));
179     PetscCall(TaoMonitor(tao, tao->niter, min_jw, epsilon, 0.0, tao->step));
180     PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
181   }
182 
183   /* free all the memory */
184   if (rank == 0) {
185     PetscCall(destroy_grad_list(&grad_list));
186     PetscCall(destroy_df_solver(&df));
187   }
188 
189   PetscCall(VecDestroy(&bmrm->local_w));
190   PetscCall(VecScatterDestroy(&bmrm->scatter));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 /* ---------------------------------------------------------- */
195 
196 static PetscErrorCode TaoSetup_BMRM(Tao tao)
197 {
198   PetscFunctionBegin;
199   /* Allocate some arrays */
200   if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient));
201   PetscFunctionReturn(PETSC_SUCCESS);
202 }
203 
204 /*------------------------------------------------------------*/
205 static PetscErrorCode TaoDestroy_BMRM(Tao tao)
206 {
207   PetscFunctionBegin;
208   PetscCall(PetscFree(tao->data));
209   PetscFunctionReturn(PETSC_SUCCESS);
210 }
211 
212 static PetscErrorCode TaoSetFromOptions_BMRM(Tao tao, PetscOptionItems *PetscOptionsObject)
213 {
214   TAO_BMRM *bmrm = (TAO_BMRM *)tao->data;
215 
216   PetscFunctionBegin;
217   PetscOptionsHeadBegin(PetscOptionsObject, "BMRM for regularized risk minimization");
218   PetscCall(PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight", "", 100, &bmrm->lambda, NULL));
219   PetscOptionsHeadEnd();
220   PetscFunctionReturn(PETSC_SUCCESS);
221 }
222 
223 /*------------------------------------------------------------*/
224 static PetscErrorCode TaoView_BMRM(Tao tao, PetscViewer viewer)
225 {
226   PetscBool isascii;
227 
228   PetscFunctionBegin;
229   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
230   if (isascii) {
231     PetscCall(PetscViewerASCIIPushTab(viewer));
232     PetscCall(PetscViewerASCIIPopTab(viewer));
233   }
234   PetscFunctionReturn(PETSC_SUCCESS);
235 }
236 
237 /*------------------------------------------------------------*/
238 /*MC
239   TAOBMRM - bundle method for regularized risk minimization
240 
241   Options Database Keys:
242 . - tao_bmrm_lambda - regulariser weight
243 
244   Level: beginner
245 M*/
246 
247 PETSC_EXTERN PetscErrorCode TaoCreate_BMRM(Tao tao)
248 {
249   TAO_BMRM *bmrm;
250 
251   PetscFunctionBegin;
252   tao->ops->setup          = TaoSetup_BMRM;
253   tao->ops->solve          = TaoSolve_BMRM;
254   tao->ops->view           = TaoView_BMRM;
255   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
256   tao->ops->destroy        = TaoDestroy_BMRM;
257 
258   PetscCall(PetscNew(&bmrm));
259   bmrm->lambda = 1.0;
260   tao->data    = (void *)bmrm;
261 
262   /* Override default settings (unless already changed) */
263   if (!tao->max_it_changed) tao->max_it = 2000;
264   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
265   if (!tao->gatol_changed) tao->gatol = 1.0e-12;
266   if (!tao->grtol_changed) tao->grtol = 1.0e-12;
267 
268   PetscFunctionReturn(PETSC_SUCCESS);
269 }
270 
271 static PetscErrorCode init_df_solver(TAO_DF *df)
272 {
273   PetscInt i, n = INCRE_DIM;
274 
275   PetscFunctionBegin;
276   /* default values */
277   df->maxProjIter = 200;
278   df->maxPGMIter  = 300000;
279   df->b           = 1.0;
280 
281   /* memory space required by Dai-Fletcher */
282   df->cur_num_cp = n;
283   PetscCall(PetscMalloc1(n, &df->f));
284   PetscCall(PetscMalloc1(n, &df->a));
285   PetscCall(PetscMalloc1(n, &df->l));
286   PetscCall(PetscMalloc1(n, &df->u));
287   PetscCall(PetscMalloc1(n, &df->x));
288   PetscCall(PetscMalloc1(n, &df->Q));
289 
290   for (i = 0; i < n; i++) PetscCall(PetscMalloc1(n, &df->Q[i]));
291 
292   PetscCall(PetscMalloc1(n, &df->g));
293   PetscCall(PetscMalloc1(n, &df->y));
294   PetscCall(PetscMalloc1(n, &df->tempv));
295   PetscCall(PetscMalloc1(n, &df->d));
296   PetscCall(PetscMalloc1(n, &df->Qd));
297   PetscCall(PetscMalloc1(n, &df->t));
298   PetscCall(PetscMalloc1(n, &df->xplus));
299   PetscCall(PetscMalloc1(n, &df->tplus));
300   PetscCall(PetscMalloc1(n, &df->sk));
301   PetscCall(PetscMalloc1(n, &df->yk));
302 
303   PetscCall(PetscMalloc1(n, &df->ipt));
304   PetscCall(PetscMalloc1(n, &df->ipt2));
305   PetscCall(PetscMalloc1(n, &df->uv));
306   PetscFunctionReturn(PETSC_SUCCESS);
307 }
308 
309 static PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
310 {
311   PetscReal *tmp, **tmp_Q;
312   PetscInt   i, n, old_n;
313 
314   PetscFunctionBegin;
315   df->dim = dim;
316   if (dim <= df->cur_num_cp) PetscFunctionReturn(PETSC_SUCCESS);
317 
318   old_n = df->cur_num_cp;
319   df->cur_num_cp += INCRE_DIM;
320   n = df->cur_num_cp;
321 
322   /* memory space required by dai-fletcher */
323   PetscCall(PetscMalloc1(n, &tmp));
324   PetscCall(PetscArraycpy(tmp, df->f, old_n));
325   PetscCall(PetscFree(df->f));
326   df->f = tmp;
327 
328   PetscCall(PetscMalloc1(n, &tmp));
329   PetscCall(PetscArraycpy(tmp, df->a, old_n));
330   PetscCall(PetscFree(df->a));
331   df->a = tmp;
332 
333   PetscCall(PetscMalloc1(n, &tmp));
334   PetscCall(PetscArraycpy(tmp, df->l, old_n));
335   PetscCall(PetscFree(df->l));
336   df->l = tmp;
337 
338   PetscCall(PetscMalloc1(n, &tmp));
339   PetscCall(PetscArraycpy(tmp, df->u, old_n));
340   PetscCall(PetscFree(df->u));
341   df->u = tmp;
342 
343   PetscCall(PetscMalloc1(n, &tmp));
344   PetscCall(PetscArraycpy(tmp, df->x, old_n));
345   PetscCall(PetscFree(df->x));
346   df->x = tmp;
347 
348   PetscCall(PetscMalloc1(n, &tmp_Q));
349   for (i = 0; i < n; i++) {
350     PetscCall(PetscMalloc1(n, &tmp_Q[i]));
351     if (i < old_n) {
352       PetscCall(PetscArraycpy(tmp_Q[i], df->Q[i], old_n));
353       PetscCall(PetscFree(df->Q[i]));
354     }
355   }
356 
357   PetscCall(PetscFree(df->Q));
358   df->Q = tmp_Q;
359 
360   PetscCall(PetscFree(df->g));
361   PetscCall(PetscMalloc1(n, &df->g));
362 
363   PetscCall(PetscFree(df->y));
364   PetscCall(PetscMalloc1(n, &df->y));
365 
366   PetscCall(PetscFree(df->tempv));
367   PetscCall(PetscMalloc1(n, &df->tempv));
368 
369   PetscCall(PetscFree(df->d));
370   PetscCall(PetscMalloc1(n, &df->d));
371 
372   PetscCall(PetscFree(df->Qd));
373   PetscCall(PetscMalloc1(n, &df->Qd));
374 
375   PetscCall(PetscFree(df->t));
376   PetscCall(PetscMalloc1(n, &df->t));
377 
378   PetscCall(PetscFree(df->xplus));
379   PetscCall(PetscMalloc1(n, &df->xplus));
380 
381   PetscCall(PetscFree(df->tplus));
382   PetscCall(PetscMalloc1(n, &df->tplus));
383 
384   PetscCall(PetscFree(df->sk));
385   PetscCall(PetscMalloc1(n, &df->sk));
386 
387   PetscCall(PetscFree(df->yk));
388   PetscCall(PetscMalloc1(n, &df->yk));
389 
390   PetscCall(PetscFree(df->ipt));
391   PetscCall(PetscMalloc1(n, &df->ipt));
392 
393   PetscCall(PetscFree(df->ipt2));
394   PetscCall(PetscMalloc1(n, &df->ipt2));
395 
396   PetscCall(PetscFree(df->uv));
397   PetscCall(PetscMalloc1(n, &df->uv));
398   PetscFunctionReturn(PETSC_SUCCESS);
399 }
400 
401 static PetscErrorCode destroy_df_solver(TAO_DF *df)
402 {
403   PetscInt i;
404 
405   PetscFunctionBegin;
406   PetscCall(PetscFree(df->f));
407   PetscCall(PetscFree(df->a));
408   PetscCall(PetscFree(df->l));
409   PetscCall(PetscFree(df->u));
410   PetscCall(PetscFree(df->x));
411 
412   for (i = 0; i < df->cur_num_cp; i++) PetscCall(PetscFree(df->Q[i]));
413   PetscCall(PetscFree(df->Q));
414   PetscCall(PetscFree(df->ipt));
415   PetscCall(PetscFree(df->ipt2));
416   PetscCall(PetscFree(df->uv));
417   PetscCall(PetscFree(df->g));
418   PetscCall(PetscFree(df->y));
419   PetscCall(PetscFree(df->tempv));
420   PetscCall(PetscFree(df->d));
421   PetscCall(PetscFree(df->Qd));
422   PetscCall(PetscFree(df->t));
423   PetscCall(PetscFree(df->xplus));
424   PetscCall(PetscFree(df->tplus));
425   PetscCall(PetscFree(df->sk));
426   PetscCall(PetscFree(df->yk));
427   PetscFunctionReturn(PETSC_SUCCESS);
428 }
429 
430 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
431 static PetscReal phi(PetscReal *x, PetscInt n, PetscReal lambda, PetscReal *a, PetscReal b, PetscReal *c, PetscReal *l, PetscReal *u)
432 {
433   PetscReal r = 0.0;
434   PetscInt  i;
435 
436   for (i = 0; i < n; i++) {
437     x[i] = -c[i] + lambda * a[i];
438     if (x[i] > u[i]) x[i] = u[i];
439     else if (x[i] < l[i]) x[i] = l[i];
440     r += a[i] * x[i];
441   }
442   return r - b;
443 }
444 
445 /** Modified Dai-Fletcher QP projector solves the problem:
446  *
447  *      minimise  0.5*x'*x - c'*x
448  *      subj to   a'*x = b
449  *                l \leq x \leq u
450  *
451  *  \param c The point to be projected onto feasible set
452  */
453 static PetscInt project(PetscInt n, PetscReal *a, PetscReal b, PetscReal *c, PetscReal *l, PetscReal *u, PetscReal *x, PetscReal *lam_ext, TAO_DF *df)
454 {
455   PetscReal lambda, lambdal, lambdau, dlambda, lambda_new;
456   PetscReal r, rl, ru, s;
457   PetscInt  innerIter;
458   PetscBool nonNegativeSlack = PETSC_FALSE;
459 
460   *lam_ext  = 0;
461   lambda    = 0;
462   dlambda   = 0.5;
463   innerIter = 1;
464 
465   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
466    *
467    *  Optimality conditions for \phi:
468    *
469    *  1. lambda   <= 0
470    *  2. r        <= 0
471    *  3. r*lambda == 0
472    */
473 
474   /* Bracketing Phase */
475   r = phi(x, n, lambda, a, b, c, l, u);
476 
477   if (nonNegativeSlack) {
478     /* inequality constraint, i.e., with \xi >= 0 constraint */
479     if (r < TOL_R) return PETSC_SUCCESS;
480   } else {
481     /* equality constraint ,i.e., without \xi >= 0 constraint */
482     if (PetscAbsReal(r) < TOL_R) return PETSC_SUCCESS;
483   }
484 
485   if (r < 0.0) {
486     lambdal = lambda;
487     rl      = r;
488     lambda  = lambda + dlambda;
489     r       = phi(x, n, lambda, a, b, c, l, u);
490     while (r < 0.0 && dlambda < BMRM_INFTY) {
491       lambdal = lambda;
492       s       = rl / r - 1.0;
493       if (s < 0.1) s = 0.1;
494       dlambda = dlambda + dlambda / s;
495       lambda  = lambda + dlambda;
496       rl      = r;
497       r       = phi(x, n, lambda, a, b, c, l, u);
498     }
499     lambdau = lambda;
500     ru      = r;
501   } else {
502     lambdau = lambda;
503     ru      = r;
504     lambda  = lambda - dlambda;
505     r       = phi(x, n, lambda, a, b, c, l, u);
506     while (r > 0.0 && dlambda > -BMRM_INFTY) {
507       lambdau = lambda;
508       s       = ru / r - 1.0;
509       if (s < 0.1) s = 0.1;
510       dlambda = dlambda + dlambda / s;
511       lambda  = lambda - dlambda;
512       ru      = r;
513       r       = phi(x, n, lambda, a, b, c, l, u);
514     }
515     lambdal = lambda;
516     rl      = r;
517   }
518 
519   PetscCheck(PetscAbsReal(dlambda) <= BMRM_INFTY, PETSC_COMM_SELF, PETSC_ERR_PLIB, "L2N2_DaiFletcherPGM detected Infeasible QP problem!");
520 
521   if (ru == 0) return innerIter;
522 
523   /* Secant Phase */
524   s       = 1.0 - rl / ru;
525   dlambda = dlambda / s;
526   lambda  = lambdau - dlambda;
527   r       = phi(x, n, lambda, a, b, c, l, u);
528 
529   while (PetscAbsReal(r) > TOL_R && dlambda > TOL_LAM * (1.0 + PetscAbsReal(lambda)) && innerIter < df->maxProjIter) {
530     innerIter++;
531     if (r > 0.0) {
532       if (s <= 2.0) {
533         lambdau = lambda;
534         ru      = r;
535         s       = 1.0 - rl / ru;
536         dlambda = (lambdau - lambdal) / s;
537         lambda  = lambdau - dlambda;
538       } else {
539         s = ru / r - 1.0;
540         if (s < 0.1) s = 0.1;
541         dlambda    = (lambdau - lambda) / s;
542         lambda_new = 0.75 * lambdal + 0.25 * lambda;
543         if (lambda_new < (lambda - dlambda)) lambda_new = lambda - dlambda;
544         lambdau = lambda;
545         ru      = r;
546         lambda  = lambda_new;
547         s       = (lambdau - lambdal) / (lambdau - lambda);
548       }
549     } else {
550       if (s >= 2.0) {
551         lambdal = lambda;
552         rl      = r;
553         s       = 1.0 - rl / ru;
554         dlambda = (lambdau - lambdal) / s;
555         lambda  = lambdau - dlambda;
556       } else {
557         s = rl / r - 1.0;
558         if (s < 0.1) s = 0.1;
559         dlambda    = (lambda - lambdal) / s;
560         lambda_new = 0.75 * lambdau + 0.25 * lambda;
561         if (lambda_new > (lambda + dlambda)) lambda_new = lambda + dlambda;
562         lambdal = lambda;
563         rl      = r;
564         lambda  = lambda_new;
565         s       = (lambdau - lambdal) / (lambdau - lambda);
566       }
567     }
568     r = phi(x, n, lambda, a, b, c, l, u);
569   }
570 
571   *lam_ext = lambda;
572   if (innerIter >= df->maxProjIter) PetscCall(PetscInfo(NULL, "WARNING: DaiFletcher max iterations\n"));
573   return innerIter;
574 }
575 
576 PetscErrorCode solve(TAO_DF *df)
577 {
578   PetscInt    i, j, innerIter, it, it2, luv, info;
579   PetscReal   gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam = 0.0, lam_ext;
580   PetscReal   DELTAsv, ProdDELTAsv;
581   PetscReal   c, *tempQ;
582   PetscReal  *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
583   PetscReal  *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
584   PetscReal  *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
585   PetscReal **Q = df->Q, *f = df->f, *t = df->t;
586   PetscInt    dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
587 
588   /* variables for the adaptive nonmonotone linesearch */
589   PetscInt  L, llast;
590   PetscReal fr, fbest, fv, fc, fv0;
591 
592   c = BMRM_INFTY;
593 
594   DELTAsv = EPS_SV;
595   if (tol <= 1.0e-5 || dim <= 20) ProdDELTAsv = 0.0F;
596   else ProdDELTAsv = EPS_SV;
597 
598   for (i = 0; i < dim; i++) tempv[i] = -x[i];
599 
600   lam_ext = 0.0;
601 
602   /* Project the initial solution */
603   project(dim, a, b, tempv, l, u, x, &lam_ext, df);
604 
605   /* Compute gradient
606      g = Q*x + f; */
607 
608   it = 0;
609   for (i = 0; i < dim; i++) {
610     if (PetscAbsReal(x[i]) > ProdDELTAsv) ipt[it++] = i;
611   }
612 
613   PetscCall(PetscArrayzero(t, dim));
614   for (i = 0; i < it; i++) {
615     tempQ = Q[ipt[i]];
616     for (j = 0; j < dim; j++) t[j] += (tempQ[j] * x[ipt[i]]);
617   }
618   for (i = 0; i < dim; i++) g[i] = t[i] + f[i];
619 
620   /* y = -(x_{k} - g_{k}) */
621   for (i = 0; i < dim; i++) y[i] = g[i] - x[i];
622 
623   /* Project x_{k} - g_{k} */
624   project(dim, a, b, y, l, u, tempv, &lam_ext, df);
625 
626   /* y = P(x_{k} - g_{k}) - x_{k} */
627   max = ALPHA_MIN;
628   for (i = 0; i < dim; i++) {
629     y[i] = tempv[i] - x[i];
630     if (PetscAbsReal(y[i]) > max) max = PetscAbsReal(y[i]);
631   }
632 
633   if (max < tol * 1e-3) return PETSC_SUCCESS;
634 
635   alpha = 1.0 / max;
636 
637   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
638   fv0 = 0.0;
639   for (i = 0; i < dim; i++) fv0 += x[i] * (0.5 * t[i] + f[i]);
640 
641   /* adaptive nonmonotone linesearch */
642   L     = 2;
643   fr    = ALPHA_MAX;
644   fbest = fv0;
645   fc    = fv0;
646   llast = 0;
647   akold = bkold = 0.0;
648 
649   /*     Iterator begins     */
650   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
651     /* tempv = -(x_{k} - alpha*g_{k}) */
652     for (i = 0; i < dim; i++) tempv[i] = alpha * g[i] - x[i];
653 
654     /* Project x_{k} - alpha*g_{k} */
655     project(dim, a, b, tempv, l, u, y, &lam_ext, df);
656 
657     /* gd = \inner{d_{k}}{g_{k}}
658         d = P(x_{k} - alpha*g_{k}) - x_{k}
659     */
660     gd = 0.0;
661     for (i = 0; i < dim; i++) {
662       d[i] = y[i] - x[i];
663       gd += d[i] * g[i];
664     }
665 
666     /* Gradient computation  */
667 
668     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
669 
670     it = it2 = 0;
671     for (i = 0; i < dim; i++) {
672       if (PetscAbsReal(d[i]) > (ProdDELTAsv * 1.0e-2)) ipt[it++] = i;
673     }
674     for (i = 0; i < dim; i++) {
675       if (PetscAbsReal(y[i]) > ProdDELTAsv) ipt2[it2++] = i;
676     }
677 
678     PetscCall(PetscArrayzero(Qd, dim));
679     /* compute Qd = Q*d */
680     if (it < it2) {
681       for (i = 0; i < it; i++) {
682         tempQ = Q[ipt[i]];
683         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * d[ipt[i]]);
684       }
685     } else { /* compute Qd = Q*y-t */
686       for (i = 0; i < it2; i++) {
687         tempQ = Q[ipt2[i]];
688         for (j = 0; j < dim; j++) Qd[j] += (tempQ[j] * y[ipt2[i]]);
689       }
690       for (j = 0; j < dim; j++) Qd[j] -= t[j];
691     }
692 
693     /* ak = inner{d_{k}}{d_{k}} */
694     ak = 0.0;
695     for (i = 0; i < dim; i++) ak += d[i] * d[i];
696 
697     bk = 0.0;
698     for (i = 0; i < dim; i++) bk += d[i] * Qd[i];
699 
700     if (bk > EPS * ak && gd < 0.0) lamnew = -gd / bk;
701     else lamnew = 1.0;
702 
703     /* fv is computing f(x_{k} + d_{k}) */
704     fv = 0.0;
705     for (i = 0; i < dim; i++) {
706       xplus[i] = x[i] + d[i];
707       tplus[i] = t[i] + Qd[i];
708       fv += xplus[i] * (0.5 * tplus[i] + f[i]);
709     }
710 
711     /* fr is fref */
712     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)) {
713       fv = 0.0;
714       for (i = 0; i < dim; i++) {
715         xplus[i] = x[i] + lamnew * d[i];
716         tplus[i] = t[i] + lamnew * Qd[i];
717         fv += xplus[i] * (0.5 * tplus[i] + f[i]);
718       }
719     }
720 
721     for (i = 0; i < dim; i++) {
722       sk[i] = xplus[i] - x[i];
723       yk[i] = tplus[i] - t[i];
724       x[i]  = xplus[i];
725       t[i]  = tplus[i];
726       g[i]  = t[i] + f[i];
727     }
728 
729     /* update the line search control parameters */
730     if (fv < fbest) {
731       fbest = fv;
732       fc    = fv;
733       llast = 0;
734     } else {
735       fc = (fc > fv ? fc : fv);
736       llast++;
737       if (llast == L) {
738         fr    = fc;
739         fc    = fv;
740         llast = 0;
741       }
742     }
743 
744     ak = bk = 0.0;
745     for (i = 0; i < dim; i++) {
746       ak += sk[i] * sk[i];
747       bk += sk[i] * yk[i];
748     }
749 
750     if (bk <= EPS * ak) alpha = ALPHA_MAX;
751     else {
752       if (bkold < EPS * akold) alpha = ak / bk;
753       else alpha = (akold + ak) / (bkold + bk);
754 
755       if (alpha > ALPHA_MAX) alpha = ALPHA_MAX;
756       else if (alpha < ALPHA_MIN) alpha = ALPHA_MIN;
757     }
758 
759     akold = ak;
760     bkold = bk;
761 
762     /* stopping criterion based on KKT conditions */
763     /* at optimal, gradient of lagrangian w.r.t. x is zero */
764 
765     bk = 0.0;
766     for (i = 0; i < dim; i++) bk += x[i] * x[i];
767 
768     if (PetscSqrtReal(ak) < tol * 10 * PetscSqrtReal(bk)) {
769       it     = 0;
770       luv    = 0;
771       kktlam = 0.0;
772       for (i = 0; i < dim; i++) {
773         /* x[i] is active hence lagrange multipliers for box constraints
774                 are zero. The lagrange multiplier for ineq. const. is then
775                 defined as below
776         */
777         if ((x[i] > DELTAsv) && (x[i] < c - DELTAsv)) {
778           ipt[it++] = i;
779           kktlam    = kktlam - a[i] * g[i];
780         } else uv[luv++] = i;
781       }
782 
783       if (it == 0 && PetscSqrtReal(ak) < tol * 0.5 * PetscSqrtReal(bk)) return PETSC_SUCCESS;
784       else {
785         kktlam = kktlam / it;
786         info   = 1;
787         for (i = 0; i < it; i++) {
788           if (PetscAbsReal(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
789             info = 0;
790             break;
791           }
792         }
793         if (info == 1) {
794           for (i = 0; i < luv; i++) {
795             if (x[uv[i]] <= DELTAsv) {
796               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
797                      not be zero. So, the gradient without beta is > 0
798               */
799               if (g[uv[i]] + kktlam * a[uv[i]] < -tol) {
800                 info = 0;
801                 break;
802               }
803             } else {
804               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
805                      not be zero. So, the gradient without eta is < 0
806               */
807               if (g[uv[i]] + kktlam * a[uv[i]] > tol) {
808                 info = 0;
809                 break;
810               }
811             }
812           }
813         }
814 
815         if (info == 1) return PETSC_SUCCESS;
816       }
817     }
818   }
819   return PETSC_SUCCESS;
820 }
821