xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision 87f595a510d358797c1cc8101e6f6c6930cb072f)
1 #include "bmrm.h"
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,
7 		     PetscReal,PetscReal*,PetscReal*,PetscReal*);
8 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,
9 			PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
10 
11 static PetscErrorCode solve(TAO_DF*);
12 
13 
14 /*------------------------------------------------------------*/
15 /* The main solver function
16 
17    f = Remp(W)		This is what the user provides us from the application layer
18    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
19 
20    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
21 */
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "make_grad_node"
25 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
26 {
27   PetscErrorCode ierr;
28 
29   PetscFunctionBegin;
30 
31   ierr = PetscMalloc(sizeof(Vec_Chain), p); CHKERRQ(ierr);
32   ierr = VecDuplicate(X, &(*p)->V); CHKERRQ(ierr);
33   ierr = VecCopy(X, (*p)->V); CHKERRQ(ierr);
34   (*p)->next = NULL;
35 
36   PetscFunctionReturn(0);
37 }
38 
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "destroy_grad_list"
42 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
43 {
44   PetscErrorCode ierr;
45   Vec_Chain *p = head->next, *q;
46 
47   PetscFunctionBegin;
48   while(p) {
49     q = p->next;
50     ierr = VecDestroy(&p->V); CHKERRQ(ierr);
51     ierr = PetscFree(p); CHKERRQ(ierr);
52     p = q;
53   }
54   head->next = NULL;
55 
56   PetscFunctionReturn(0);
57 }
58 
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "TaoSolve_BMRM"
62 static PetscErrorCode TaoSolve_BMRM(TaoSolver tao)
63 {
64   PetscErrorCode ierr;
65   TaoSolverTerminationReason reason;
66   TAO_DF df;
67   TAO_BMRM *bmrm = (TAO_BMRM*)tao->data;
68 
69   /* Values and pointers to parts of the optimization problem */
70   PetscReal f = 0.0;
71   Vec W = tao->solution;
72   Vec G = tao->gradient;
73   PetscReal lambda;
74 
75   PetscReal bt;
76   Vec_Chain grad_list, *tail_glist, *pgrad;
77 
78   PetscInt iter = 0;
79   PetscInt i;
80   PetscMPIInt rank;
81 
82   /* Used in termination criteria check */
83   PetscReal reg;
84   PetscReal jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
85   PetscReal innerSolverTol;
86 
87   PetscFunctionBegin;
88 
89   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank); CHKERRQ(ierr);
90   lambda = bmrm->lambda;
91 
92   /* Check Stopping Condition */
93   tao->step = 1.0;
94   max_jtwt = -BMRM_INFTY;
95   min_jw = BMRM_INFTY;
96   innerSolverTol = 1.0;
97   epsilon = 0.0;
98 
99   if (rank == 0) {
100     ierr = init_df_solver(&df); CHKERRQ(ierr);
101     grad_list.next = NULL;
102     tail_glist = &grad_list;
103   }
104 
105   df.tol = 1e-6;
106   reason = TAO_CONTINUE_ITERATING;
107 
108   /*-----------------Algorithm Begins------------------------*/
109   /* make the scatter */
110   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w); CHKERRQ(ierr);
111   ierr = VecAssemblyBegin(bmrm->local_w); CHKERRQ(ierr);
112   ierr = VecAssemblyEnd(bmrm->local_w); CHKERRQ(ierr);
113 
114 	/* NOTE: In application pass the sub-gradient of Remp(W) */
115   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G); CHKERRQ(ierr);
116   ierr = TaoMonitor(tao,iter,f,1.0,0.0,tao->step,&reason); CHKERRQ(ierr);
117   while (reason == TAO_CONTINUE_ITERATING) {
118     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
119     ierr = VecDot(W, G, &bt); CHKERRQ(ierr);
120     bt = f - bt;
121 
122     /* First gather the gradient to the master node */
123     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr);
124     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr);
125 
126     /* Bring up the inner solver */
127     if (rank == 0) {
128       ierr = ensure_df_space(iter+1, &df);  CHKERRQ(ierr);
129       ierr = make_grad_node(bmrm->local_w, &pgrad); CHKERRQ(ierr);
130       tail_glist->next = pgrad;
131       tail_glist = pgrad;
132 
133       df.a[iter] = 1.0;
134       df.f[iter] = -bt;
135       df.u[iter] = 1.0;
136       df.l[iter] = 0.0;
137 
138       /* set up the Q */
139       pgrad = grad_list.next;
140       for (i=0; i<=iter; i++) {
141         ierr = VecDot(pgrad->V, bmrm->local_w, &reg); CHKERRQ(ierr);
142         df.Q[i][iter] = df.Q[iter][i] = reg / lambda;
143         pgrad = pgrad->next;
144       }
145 
146       if (iter > 0) {
147         df.x[iter] = 0.0;
148         ierr = solve(&df);  CHKERRQ(ierr);
149       }
150       else
151 	df.x[0] = 1.0;
152 
153       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
154       jtwt = 0.0;
155       ierr = VecSet(bmrm->local_w, 0.0);  CHKERRQ(ierr);
156       pgrad = grad_list.next;
157       for (i=0; i<=iter; i++) {
158         jtwt -= df.x[i] * df.f[i];
159         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V);  CHKERRQ(ierr);
160         pgrad = pgrad->next;
161       }
162 
163       ierr = VecNorm(bmrm->local_w, NORM_2, &reg);  CHKERRQ(ierr);
164       reg = 0.5*lambda*reg*reg;
165       jtwt -= reg;
166     } /* end if rank == 0 */
167 
168     /* scatter the new W to all nodes */
169     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE); CHKERRQ(ierr);
170     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE); CHKERRQ(ierr);
171 
172     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G); CHKERRQ(ierr);
173 
174     MPI_Bcast(&jtwt,1,MPI_DOUBLE,0,PETSC_COMM_WORLD);
175     MPI_Bcast(&reg,1,MPI_DOUBLE,0,PETSC_COMM_WORLD);
176 
177     jw = reg + f;					/* J(w) = regularizer + Remp(w) */
178     if (jw < min_jw)
179       min_jw = jw;
180     if (jtwt > max_jtwt)
181       max_jtwt = jtwt;
182 
183     pre_epsilon = epsilon;
184     epsilon = min_jw - jtwt;
185 
186     if (rank == 0) {
187       if (innerSolverTol > epsilon)
188         innerSolverTol = epsilon;
189       else if (innerSolverTol < 1e-7)
190         innerSolverTol = 1e-7;
191 
192       /* if the annealing doesn't work well, lower the inner solver tolerance */
193       if(pre_epsilon < epsilon)
194         innerSolverTol *= 0.2;
195 
196       df.tol = innerSolverTol*0.5;
197     }
198 
199     iter++;
200     ierr = TaoMonitor(tao,iter,min_jw,epsilon,0.0,tao->step,&reason); CHKERRQ(ierr);
201   }
202 
203   /* free all the memory */
204   if (rank == 0) {
205     ierr = destroy_grad_list(&grad_list); 	CHKERRQ(ierr);
206     ierr = destroy_df_solver(&df); CHKERRQ(ierr);
207   }
208 
209   ierr = VecDestroy(&bmrm->local_w); CHKERRQ(ierr);
210   ierr = VecScatterDestroy(&bmrm->scatter); CHKERRQ(ierr);
211 
212   PetscFunctionReturn(0);
213 }
214 
215 
216 /* ---------------------------------------------------------- */
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "TaoSetup_BMRM"
220 static PetscErrorCode TaoSetup_BMRM(TaoSolver tao) {
221 
222   PetscErrorCode      ierr;
223 
224   PetscFunctionBegin;
225 
226   /* Allocate some arrays */
227   if (!tao->gradient) {
228     ierr = VecDuplicate(tao->solution, &tao->gradient);    CHKERRQ(ierr);
229   }
230 
231   PetscFunctionReturn(0);
232 }
233 
234 
235 /*------------------------------------------------------------*/
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "TaoDestroy_BMRM"
239 static PetscErrorCode TaoDestroy_BMRM(TaoSolver tao)
240 {
241   PetscErrorCode ierr;
242 
243   PetscFunctionBegin;
244   ierr = PetscFree(tao->data); CHKERRQ(ierr);
245   PetscFunctionReturn(0);
246 }
247 
248 
249 #undef __FUNCT__
250 #define __FUNCT__ "TaoSetFromOptions_BMRM"
251 static PetscErrorCode TaoSetFromOptions_BMRM(TaoSolver tao)
252 {
253   PetscErrorCode ierr;
254   TAO_BMRM*      bmrm = (TAO_BMRM*)tao->data;
255   PetscBool      flg;
256 
257   PetscFunctionBegin;
258   ierr = PetscOptionsHead("BMRM for regularized risk minimization");CHKERRQ(ierr);
259   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,&flg);  CHKERRQ(ierr);
260   ierr = PetscOptionsTail();CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 
265 /*------------------------------------------------------------*/
266 #undef __FUNCT__
267 #define __FUNCT__ "TaoView_BMRM"
268 static PetscErrorCode TaoView_BMRM(TaoSolver tao, PetscViewer viewer)
269 {
270   PetscBool           isascii;
271   PetscErrorCode      ierr;
272 
273   PetscFunctionBegin;
274   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
275   if (isascii) {
276     ierr = PetscViewerASCIIPushTab(viewer); CHKERRQ(ierr);
277     ierr = PetscViewerASCIIPopTab(viewer); CHKERRQ(ierr);
278   }
279   else{
280     SETERRQ1(((PetscObject)tao)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for TAO BMRM",((PetscObject)viewer)->type_name);
281   }
282 
283   PetscFunctionReturn(0);
284 }
285 
286 
287 /*------------------------------------------------------------*/
288 EXTERN_C_BEGIN
289 #undef __FUNCT__
290 #define __FUNCT__ "TaoCreate_BMRM"
291 PetscErrorCode TaoCreate_BMRM(TaoSolver tao)
292 {
293   TAO_BMRM *bmrm;
294   PetscErrorCode      ierr;
295 
296   PetscFunctionBegin;
297   tao->ops->setup = TaoSetup_BMRM;
298   tao->ops->solve = TaoSolve_BMRM;
299   tao->ops->view  = TaoView_BMRM;
300   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
301   tao->ops->destroy = TaoDestroy_BMRM;
302 
303   ierr = PetscNewLog(tao,&bmrm); CHKERRQ(ierr);
304   bmrm->lambda = 1.0;
305   tao->data = (void*)bmrm;
306 
307   /* Note: May need to be tuned! */
308   tao->max_it = 2048;
309   tao->max_funcs = 300000;
310   tao->fatol = 1e-20;
311   tao->frtol = 1e-25;
312   tao->gatol = 1e-25;
313   tao->grtol = 1e-25;
314 
315   PetscFunctionReturn(0);
316 }
317 EXTERN_C_END
318 
319 
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "init_df_solver"
323 PetscErrorCode init_df_solver(TAO_DF *df)
324 {
325   PetscInt i, n = INCRE_DIM;
326   PetscErrorCode ierr;
327 
328   PetscFunctionBegin;
329 
330   /* default values */
331   df->maxProjIter = 200;
332   df->maxPGMIter = 300000;
333   df->b = 1.0;
334 
335   /* memory space required by Dai-Fletcher */
336   df->cur_num_cp = n;
337   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->f);  CHKERRQ(ierr);
338   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->a);  CHKERRQ(ierr);
339   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->l);  CHKERRQ(ierr);
340   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->u);  CHKERRQ(ierr);
341   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->x);  CHKERRQ(ierr);
342   ierr = PetscMalloc(sizeof(PetscReal*)*n, &df->Q);  CHKERRQ(ierr);
343 
344   for (i = 0; i < n; i ++) {
345     ierr = PetscMalloc(sizeof(PetscReal)*n, &df->Q[i]);  CHKERRQ(ierr);
346   }
347 
348   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->g);  CHKERRQ(ierr);
349   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->y);  CHKERRQ(ierr);
350   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tempv);  CHKERRQ(ierr);
351   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->d);  CHKERRQ(ierr);
352   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->Qd);  CHKERRQ(ierr);
353   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->t);  CHKERRQ(ierr);
354   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->xplus);  CHKERRQ(ierr);
355   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tplus);  CHKERRQ(ierr);
356   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->sk);  CHKERRQ(ierr);
357   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->yk);  CHKERRQ(ierr);
358 
359   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt);  CHKERRQ(ierr);
360   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt2);  CHKERRQ(ierr);
361   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->uv);  CHKERRQ(ierr);
362 
363   PetscFunctionReturn(0);
364 }
365 
366 
367 #undef __FUNCT__
368 #define __FUNCT__ "ensure_df_space"
369 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
370 {
371   PetscErrorCode ierr;
372   PetscReal *tmp, **tmp_Q;
373   PetscInt i, n, old_n;
374 
375   df->dim = dim;
376   if (dim <= df->cur_num_cp)
377     return 0;
378 
379   PetscFunctionBegin;
380 
381   old_n = df->cur_num_cp;
382   df->cur_num_cp += INCRE_DIM;
383   n = df->cur_num_cp;
384 
385   /* memory space required by dai-fletcher */
386   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp);  CHKERRQ(ierr);
387   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n);  CHKERRQ(ierr);
388   ierr = PetscFree(df->f);  CHKERRQ(ierr);
389   df->f = tmp;
390 
391   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp);  CHKERRQ(ierr);
392   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n);  CHKERRQ(ierr);
393   ierr = PetscFree(df->a);  CHKERRQ(ierr);
394   df->a = tmp;
395 
396   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp);  CHKERRQ(ierr);
397   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n);  CHKERRQ(ierr);
398   ierr = PetscFree(df->l);  CHKERRQ(ierr);
399   df->l = tmp;
400 
401   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp);  CHKERRQ(ierr);
402   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n);  CHKERRQ(ierr);
403   ierr = PetscFree(df->u);  CHKERRQ(ierr);
404   df->u = tmp;
405 
406   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp);  CHKERRQ(ierr);
407   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n);  CHKERRQ(ierr);
408   ierr = PetscFree(df->x);  CHKERRQ(ierr);
409   df->x = tmp;
410 
411   ierr = PetscMalloc(sizeof(PetscReal*)*n, &tmp_Q);  CHKERRQ(ierr);
412   for (i = 0; i < n; i ++)
413   {
414     ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp_Q[i]);  CHKERRQ(ierr);
415     if (i < old_n)
416     {
417       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n);  CHKERRQ(ierr);
418       ierr = PetscFree(df->Q[i]);  CHKERRQ(ierr);
419     }
420   }
421 
422   ierr = PetscFree(df->Q);  CHKERRQ(ierr);
423   df->Q = tmp_Q;
424 
425   ierr = PetscFree(df->g);  CHKERRQ(ierr);
426   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->g);  CHKERRQ(ierr);
427 
428   ierr = PetscFree(df->y);  CHKERRQ(ierr);
429   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->y);  CHKERRQ(ierr);
430 
431   ierr = PetscFree(df->tempv);  CHKERRQ(ierr);
432   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tempv);  CHKERRQ(ierr);
433 
434   ierr = PetscFree(df->d);  CHKERRQ(ierr);
435   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->d);  CHKERRQ(ierr);
436 
437   ierr = PetscFree(df->Qd);  CHKERRQ(ierr);
438   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->Qd);  CHKERRQ(ierr);
439 
440   ierr = PetscFree(df->t);  CHKERRQ(ierr);
441   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->t);  CHKERRQ(ierr);
442 
443   ierr = PetscFree(df->xplus);  CHKERRQ(ierr);
444   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->xplus);  CHKERRQ(ierr);
445 
446   ierr = PetscFree(df->tplus);  CHKERRQ(ierr);
447   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tplus);  CHKERRQ(ierr);
448 
449   ierr = PetscFree(df->sk);  CHKERRQ(ierr);
450   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->sk);  CHKERRQ(ierr);
451 
452   ierr = PetscFree(df->yk);  CHKERRQ(ierr);
453   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->yk);  CHKERRQ(ierr);
454 
455   ierr = PetscFree(df->ipt);  CHKERRQ(ierr);
456   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt);  CHKERRQ(ierr);
457 
458   ierr = PetscFree(df->ipt2);  CHKERRQ(ierr);
459   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt2);  CHKERRQ(ierr);
460 
461   ierr = PetscFree(df->uv);  CHKERRQ(ierr);
462   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->uv);  CHKERRQ(ierr);
463 
464   PetscFunctionReturn(0);
465 }
466 
467 
468 #undef __FUNCT__
469 #define __FUNCT__ "destroy_df_solver"
470 PetscErrorCode destroy_df_solver(TAO_DF *df)
471 {
472   PetscErrorCode ierr;
473   PetscInt       i;
474 
475   PetscFunctionBegin;
476   ierr = PetscFree(df->f);  CHKERRQ(ierr);
477   ierr = PetscFree(df->a);  CHKERRQ(ierr);
478   ierr = PetscFree(df->l);  CHKERRQ(ierr);
479   ierr = PetscFree(df->u);  CHKERRQ(ierr);
480   ierr = PetscFree(df->x);  CHKERRQ(ierr);
481 
482   for (i = 0; i < df->cur_num_cp; i ++) {
483     ierr = PetscFree(df->Q[i]);  CHKERRQ(ierr);
484   }
485   ierr = PetscFree(df->Q);  CHKERRQ(ierr);
486   ierr = PetscFree(df->ipt);  CHKERRQ(ierr);
487   ierr = PetscFree(df->ipt2);  CHKERRQ(ierr);
488   ierr = PetscFree(df->uv);  CHKERRQ(ierr);
489   ierr = PetscFree(df->g);  CHKERRQ(ierr);
490   ierr = PetscFree(df->y);  CHKERRQ(ierr);
491   ierr = PetscFree(df->tempv);  CHKERRQ(ierr);
492   ierr = PetscFree(df->d);  CHKERRQ(ierr);
493   ierr = PetscFree(df->Qd);  CHKERRQ(ierr);
494   ierr = PetscFree(df->t);  CHKERRQ(ierr);
495   ierr = PetscFree(df->xplus);  CHKERRQ(ierr);
496   ierr = PetscFree(df->tplus);  CHKERRQ(ierr);
497   ierr = PetscFree(df->sk);  CHKERRQ(ierr);
498   ierr = PetscFree(df->yk);  CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 
503 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
504 #undef __FUNCT__
505 #define __FUNCT__ "phi"
506 PetscReal phi(PetscReal *x,PetscInt n,PetscReal lambda,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u)
507 {
508   PetscReal r = 0.0;
509   PetscInt  i;
510 
511   for (i = 0; i < n; i++){
512     x[i] = -c[i] + lambda*a[i];
513     if (x[i] > u[i])     x[i] = u[i];
514     else if(x[i] < l[i]) x[i] = l[i];
515     r += a[i]*x[i];
516   }
517   return r - b;
518 }
519 
520 /** Modified Dai-Fletcher QP projector solves the problem:
521  *
522  *      minimise  0.5*x'*x - c'*x
523  *      subj to   a'*x = b
524  *                l \leq x \leq u
525  *
526  *  \param c The point to be projected onto feasible set
527  */
528 #undef __FUNCT__
529 #define __FUNCT__ "project"
530 PetscInt project(PetscInt n,PetscReal *a,PetscReal b,PetscReal *c,PetscReal *l,PetscReal *u,PetscReal *x,PetscReal *lam_ext,TAO_DF *df)
531 {
532   PetscReal lambda, lambdal, lambdau, dlambda, lambda_new;
533   PetscReal r, rl, ru, s;
534   PetscInt  innerIter;
535   PetscBool nonNegativeSlack = PETSC_FALSE;
536 
537   *lam_ext = 0;
538   lambda  = 0;
539   dlambda = 0.5;
540   innerIter = 1;
541 
542   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
543    *
544    *  Optimality conditions for \phi:
545    *
546    *  1. lambda   <= 0
547    *  2. r        <= 0
548    *  3. r*lambda == 0
549    */
550 
551   /* Bracketing Phase */
552   r = phi(x, n, lambda, a, b, c, l, u);
553 
554   if(nonNegativeSlack) {
555     /* inequality constraint, i.e., with \xi >= 0 constraint */
556     if (r < TOL_R)
557       return 0;
558   } else  {
559     /* equality constraint ,i.e., without \xi >= 0 constraint */
560     if (fabs(r) < TOL_R)
561       return 0;
562   }
563 
564   if (r < 0.0){
565     lambdal = lambda;
566     rl      = r;
567     lambda  = lambda + dlambda;
568     r       = phi(x, n, lambda, a, b, c, l, u);
569     while (r < 0.0 && dlambda < BMRM_INFTY)  {
570       lambdal = lambda;
571       s       = rl/r - 1.0;
572       if (s < 0.1) s = 0.1;
573       dlambda = dlambda + dlambda/s;
574       lambda  = lambda + dlambda;
575       rl      = r;
576       r       = phi(x, n, lambda, a, b, c, l, u);
577     }
578     lambdau = lambda;
579     ru      = r;
580   } else {
581     lambdau = lambda;
582     ru      = r;
583     lambda  = lambda - dlambda;
584     r       = phi(x, n, lambda, a, b, c, l, u);
585     while (r > 0.0 && dlambda > -BMRM_INFTY) {
586       lambdau = lambda;
587       s       = ru/r - 1.0;
588       if (s < 0.1) s = 0.1;
589       dlambda = dlambda + dlambda/s;
590       lambda  = lambda - dlambda;
591       ru      = r;
592       r       = phi(x, n, lambda, a, b, c, l, u);
593     }
594     lambdal = lambda;
595     rl      = r;
596   }
597 
598   if(fabs(dlambda) > BMRM_INFTY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"L2N2_DaiFletcherPGM detected Infeasible QP problem!");
599 
600   if(ru == 0){
601     lambda = lambdau;
602     r = phi(x, n, lambda, a, b, c, l, u);
603     return innerIter;
604   }
605 
606   /* Secant Phase */
607   s       = 1.0 - rl/ru;
608   dlambda = dlambda/s;
609   lambda  = lambdau - dlambda;
610   r       = phi(x, n, lambda, a, b, c, l, u);
611 
612   while (fabs(r) > TOL_R
613          && dlambda > TOL_LAM * (1.0 + fabs(lambda))
614          && innerIter < df->maxProjIter){
615     innerIter++;
616     if (r > 0.0){
617       if (s <= 2.0){
618         lambdau = lambda;
619         ru      = r;
620         s       = 1.0 - rl/ru;
621         dlambda = (lambdau - lambdal) / s;
622         lambda  = lambdau - dlambda;
623       }
624       else {
625         s          = ru/r-1.0;
626         if (s < 0.1) s = 0.1;
627         dlambda    = (lambdau - lambda) / s;
628         lambda_new = 0.75*lambdal + 0.25*lambda;
629         if (lambda_new < (lambda - dlambda))
630           lambda_new = lambda - dlambda;
631         lambdau    = lambda;
632         ru         = r;
633         lambda     = lambda_new;
634         s          = (lambdau - lambdal) / (lambdau - lambda);
635       }
636     }
637     else {
638       if (s >= 2.0){
639         lambdal = lambda;
640         rl      = r;
641         s       = 1.0 - rl/ru;
642         dlambda = (lambdau - lambdal) / s;
643         lambda  = lambdau - dlambda;
644       }
645       else {
646         s          = rl/r - 1.0;
647         if (s < 0.1) s = 0.1;
648         dlambda    = (lambda-lambdal) / s;
649         lambda_new = 0.75*lambdau + 0.25*lambda;
650         if (lambda_new > (lambda + dlambda))
651           lambda_new = lambda + dlambda;
652         lambdal    = lambda;
653         rl         = r;
654         lambda     = lambda_new;
655         s          = (lambdau - lambdal) / (lambdau-lambda);
656       }
657     }
658     r = phi(x, n, lambda, a, b, c, l, u);
659   }
660 
661   *lam_ext = lambda;
662   if(innerIter >= df->maxProjIter)
663     PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");
664 
665   return innerIter;
666 }
667 
668 
669 #undef __FUNCT__
670 #define __FUNCT__ "solve"
671 PetscErrorCode solve(TAO_DF *df)
672 {
673   PetscErrorCode ierr;
674   PetscInt    i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
675   PetscReal gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
676   PetscReal DELTAsv, ProdDELTAsv;
677   PetscReal c, *tempQ;
678   PetscReal *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
679   PetscReal *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
680   PetscReal *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
681   PetscReal **Q = df->Q, *f = df->f, *t = df->t;
682   PetscInt dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
683 
684   /*** variables for the adaptive nonmonotone linesearch ***/
685   PetscInt    L, llast;
686   PetscReal fr, fbest, fv, fc, fv0;
687   c = BMRM_INFTY;
688 
689   DELTAsv = EPS_SV;
690   if (tol <= 1.0e-5 || dim <= 20)
691     ProdDELTAsv = 0.0F;
692   else
693     ProdDELTAsv = EPS_SV;
694 
695   for (i = 0; i < dim; i++)
696     tempv[i] = -x[i];
697 
698   lam_ext = 0.0;
699 
700   /* Project the initial solution */
701   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
702 
703   /* Compute gradient
704      g = Q*x + f; */
705 
706   it = 0;
707   for (i = 0; i < dim; i++)
708     if (fabs(x[i]) > ProdDELTAsv)
709       ipt[it++] = i;
710 
711   ierr = PetscMemzero(t, dim*sizeof(PetscReal));  CHKERRQ(ierr);
712   for (i = 0; i < it; i++){
713     tempQ = Q[ipt[i]];
714     for (j = 0; j < dim; j++)
715       t[j] += (tempQ[j]*x[ipt[i]]);
716   }
717   for (i = 0; i < dim; i++){
718     g[i] = t[i] + f[i];
719   }
720 
721 
722   /* y = -(x_{k} - g_{k}) */
723   for (i = 0; i < dim; i++){
724     y[i] = g[i] - x[i];
725   }
726 
727   /* Project x_{k} - g_{k} */
728   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
729 
730   /* y = P(x_{k} - g_{k}) - x_{k} */
731   max = ALPHA_MIN;
732   for (i = 0; i < dim; i++){
733     y[i] = tempv[i] - x[i];
734     if (fabs(y[i]) > max)
735       max = fabs(y[i]);
736   }
737 
738   if (max < tol*1e-3){
739     lscount = 0;
740     innerIter    = 0;
741     return 0;
742   }
743 
744   alpha = 1.0 / max;
745 
746   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
747   fv0   = 0.0;
748   for (i = 0; i < dim; i++)
749     fv0 += x[i] * (0.5*t[i] + f[i]);
750 
751   /*** adaptive nonmonotone linesearch ***/
752   L     = 2;
753   fr    = ALPHA_MAX;
754   fbest = fv0;
755   fc    = fv0;
756   llast = 0;
757   akold = bkold = 0.0;
758 
759   /***      Iterator begins     ***/
760   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
761 
762     /* tempv = -(x_{k} - alpha*g_{k}) */
763     for (i = 0; i < dim; i++)
764       tempv[i] = alpha*g[i] - x[i];
765 
766     /* Project x_{k} - alpha*g_{k} */
767     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
768 
769 
770     /* gd = \inner{d_{k}}{g_{k}}
771         d = P(x_{k} - alpha*g_{k}) - x_{k}
772     */
773     gd = 0.0;
774     for (i = 0; i < dim; i++){
775       d[i] = y[i] - x[i];
776       gd  += d[i] * g[i];
777     }
778 
779     /* Gradient computation  */
780 
781     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
782 
783     it = it2 = 0;
784     for (i = 0; i < dim; i++)
785       if (fabs(d[i]) > (ProdDELTAsv*1.0e-2))
786         ipt[it++]   = i;
787     for (i = 0; i < dim; i++)
788       if (fabs(y[i]) > ProdDELTAsv)
789         ipt2[it2++] = i;
790 
791     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal));  CHKERRQ(ierr);
792     /* compute Qd = Q*d */
793     if (it < it2){
794       for (i = 0; i < it; i++){
795         tempQ = Q[ipt[i]];
796         for (j = 0; j < dim; j++)
797           Qd[j] += (tempQ[j] * d[ipt[i]]);
798       }
799     }
800     else { /* compute Qd = Q*y-t */
801       for (i = 0; i < it2; i++){
802         tempQ = Q[ipt2[i]];
803         for (j = 0; j < dim; j++)
804           Qd[j] += (tempQ[j] * y[ipt2[i]]);
805       }
806       for (j = 0; j < dim; j++)
807         Qd[j] -= t[j];
808     }
809 
810 
811     /* ak = inner{d_{k}}{d_{k}} */
812     ak = 0.0;
813     for (i = 0; i < dim; i++)
814       ak += d[i] * d[i];
815 
816     bk = 0.0;
817     for (i = 0; i < dim; i++)
818       bk += d[i]*Qd[i];
819 
820     if (bk > EPS*ak && gd < 0.0)    /* ak is normd */
821       lamnew = -gd/bk;
822     else
823       lamnew = 1.0;
824 
825     /* fv is computing f(x_{k} + d_{k}) */
826     fv = 0.0;
827     for (i = 0; i < dim; i++){
828       xplus[i] = x[i] + d[i];
829       tplus[i] = t[i] + Qd[i];
830       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
831     }
832 
833     /* fr is fref */
834     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
835       lscount++;
836       fv = 0.0;
837       for (i = 0; i < dim; i++){
838         xplus[i] = x[i] + lamnew*d[i];
839         tplus[i] = t[i] + lamnew*Qd[i];
840         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
841       }
842     }
843 
844     for (i = 0; i < dim; i++){
845       sk[i] = xplus[i] - x[i];
846       yk[i] = tplus[i] - t[i];
847       x[i]  = xplus[i];
848       t[i]  = tplus[i];
849       g[i]  = t[i] + f[i];
850     }
851 
852 
853     /* update the line search control parameters */
854 
855     if (fv < fbest){
856       fbest = fv;
857       fc    = fv;
858       llast = 0;
859     }
860     else {
861       fc = (fc > fv ? fc : fv);
862       llast++;
863       if (llast == L){
864         fr    = fc;
865         fc    = fv;
866         llast = 0;
867       }
868     }
869 
870     ak = bk = 0.0;
871     for (i = 0; i < dim; i++){
872       ak += sk[i] * sk[i];
873       bk += sk[i] * yk[i];
874     }
875 
876     if (bk <= EPS*ak)
877       alpha = ALPHA_MAX;
878     else{
879       if (bkold < EPS*akold)
880         alpha = ak/bk;
881       else
882         alpha = (akold+ak)/(bkold+bk);
883 
884       if (alpha > ALPHA_MAX)
885         alpha = ALPHA_MAX;
886       else if (alpha < ALPHA_MIN)
887         alpha = ALPHA_MIN;
888     }
889 
890     akold = ak;
891     bkold = bk;
892 
893 
894     /*** stopping criterion based on KKT conditions ***/
895     /* at optimal, gradient of lagrangian w.r.t. x is zero */
896 
897     bk = 0.0;
898     for (i = 0; i < dim; i++)
899       bk +=  x[i] * x[i];
900 
901     if (sqrt(ak) < tol*10 * sqrt(bk)){
902       it     = 0;
903       luv    = 0;
904       kktlam = 0.0;
905       for (i = 0; i < dim; i++){
906         /* x[i] is active hence lagrange multipliers for box constraints
907                 are zero. The lagrange multiplier for ineq. const. is then
908                 defined as below
909         */
910         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
911           ipt[it++] = i;
912           kktlam    = kktlam - a[i]*g[i];
913         }
914         else
915           uv[luv++] = i;
916       }
917 
918       if (it == 0 && sqrt(ak) < tol*0.5 * sqrt(bk))
919         return 0;
920       else {
921         kktlam = kktlam/it;
922         info   = 1;
923         for (i = 0; i < it; i++) {
924           if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
925             info = 0;
926             break;
927           }
928         }
929         if (info == 1)  {
930           for (i = 0; i < luv; i++)  {
931             if (x[uv[i]] <= DELTAsv){
932               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
933                      not be zero. So, the gradient without beta is > 0
934               */
935               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
936                 info = 0;
937                 break;
938               }
939             }
940             else {
941               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
942                      not be zero. So, the gradient without eta is < 0
943               */
944               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
945                 info = 0;
946                 break;
947               }
948             }
949           }
950         }
951 
952         if (info == 1)
953           return 0;
954       }
955     }
956   }
957   return 0;
958 }
959 
960 
961