xref: /petsc/src/tao/unconstrained/impls/bmrm/bmrm.c (revision 3c9e27cfca911a7d7e3219758be42726e83c4ab2)
1 #include "bmrm.h"
2 
3 static PetscErrorCode init_df_solver(TAO_DF*);
4 static PetscErrorCode ensure_df_space(PetscInt, TAO_DF*);
5 static PetscErrorCode destroy_df_solver(TAO_DF*);
6 static PetscReal phi(PetscReal*,PetscInt,PetscReal,PetscReal*,
7 		     PetscReal,PetscReal*,PetscReal*,PetscReal*);
8 static PetscInt project(PetscInt,PetscReal*,PetscReal,PetscReal*,
9 			PetscReal*,PetscReal*,PetscReal*,PetscReal*,TAO_DF*);
10 
11 static PetscErrorCode solve(TAO_DF*);
12 
13 
14 /*------------------------------------------------------------*/
15 /* The main solver function
16 
17    f = Remp(W)		This is what the user provides us from the application layer
18    So the ComputeGradient function for instance should get us back the subgradient of Remp(W)
19 
20    Regularizer assumed to be L2 norm = lambda*0.5*W'W ()
21 */
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "make_grad_node"
25 static PetscErrorCode make_grad_node(Vec X, Vec_Chain **p)
26 {
27   PetscErrorCode ierr;
28 
29   PetscFunctionBegin;
30 
31   ierr = PetscMalloc(sizeof(Vec_Chain), p); CHKERRQ(ierr);
32   ierr = VecDuplicate(X, &(*p)->V); CHKERRQ(ierr);
33   ierr = VecCopy(X, (*p)->V); CHKERRQ(ierr);
34   (*p)->next = PETSC_NULL;
35 
36   PetscFunctionReturn(0);
37 }
38 
39 
40 #undef __FUNCT__
41 #define __FUNCT__ "destroy_grad_list"
42 static PetscErrorCode destroy_grad_list(Vec_Chain *head)
43 {
44   PetscErrorCode ierr;
45   Vec_Chain *p = head->next, *q;
46 
47   PetscFunctionBegin;
48   while(p) {
49     q = p->next;
50     ierr = VecDestroy(&p->V); CHKERRQ(ierr);
51     ierr = PetscFree(p); CHKERRQ(ierr);
52     p = q;
53   }
54   head->next = PETSC_NULL;
55 
56   PetscFunctionReturn(0);
57 }
58 
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "TaoSolve_BMRM"
62 static PetscErrorCode TaoSolve_BMRM(TaoSolver tao)
63 {
64   PetscErrorCode ierr;
65   TaoSolverTerminationReason reason;
66   TAO_DF df;
67   TAO_BMRM *bmrm = (TAO_BMRM*)tao->data;
68 
69   /* Values and pointers to parts of the optimization problem */
70   PetscReal f = 0.0;
71   Vec W = tao->solution;
72   Vec G = tao->gradient;
73   PetscReal lambda;
74 
75   PetscReal bt;
76   Vec_Chain grad_list, *tail_glist, *pgrad;
77 
78   PetscInt iter = 0;
79   PetscInt i;
80   PetscMPIInt rank;
81 
82   /* Used in termination criteria check */
83   PetscReal reg;
84   PetscReal jtwt, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
85   PetscReal innerSolverTol;
86 
87   PetscFunctionBegin;
88 
89   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank); CHKERRQ(ierr);
90   lambda = bmrm->lambda;
91 
92   /* Check Stopping Condition */
93   tao->step = 1.0;
94   max_jtwt = -BMRM_INFTY;
95   min_jw = BMRM_INFTY;
96   innerSolverTol = 1.0;
97   epsilon = 0.0;
98 
99   if (rank == 0) {
100     ierr = init_df_solver(&df); CHKERRQ(ierr);
101     grad_list.next = NULL;
102     tail_glist = &grad_list;
103   }
104 
105   df.tol = 1e-6;
106   reason = TAO_CONTINUE_ITERATING;
107 
108   /*-----------------Algorithm Begins------------------------*/
109   /* make the scatter */
110   ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w); CHKERRQ(ierr);
111   ierr = VecAssemblyBegin(bmrm->local_w); CHKERRQ(ierr);
112   ierr = VecAssemblyEnd(bmrm->local_w); CHKERRQ(ierr);
113 
114 	/* NOTE: In application pass the sub-gradient of Remp(W) */
115   ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G); CHKERRQ(ierr);
116   ierr = TaoMonitor(tao,iter,f,1.0,0.0,tao->step,&reason); CHKERRQ(ierr);
117   while (reason == TAO_CONTINUE_ITERATING) {
118     /* compute bt = Remp(Wt-1) - <Wt-1, At> */
119     ierr = VecDot(W, G, &bt); CHKERRQ(ierr);
120     bt = f - bt;
121 
122     /* First gather the gradient to the master node */
123     ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr);
124     ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr);
125 
126     /* Bring up the inner solver */
127     if (rank == 0) {
128       ierr = ensure_df_space(iter+1, &df);  CHKERRQ(ierr);
129       ierr = make_grad_node(bmrm->local_w, &pgrad); CHKERRQ(ierr);
130       tail_glist->next = pgrad;
131       tail_glist = pgrad;
132 
133       df.a[iter] = 1.0;
134       df.f[iter] = -bt;
135       df.u[iter] = 1.0;
136       df.l[iter] = 0.0;
137 
138       /* set up the Q */
139       pgrad = grad_list.next;
140       for (i=0; i<=iter; i++) {
141         ierr = VecDot(pgrad->V, bmrm->local_w, &reg); CHKERRQ(ierr);
142         df.Q[i][iter] = df.Q[iter][i] = reg / lambda;
143         pgrad = pgrad->next;
144       }
145 
146       if (iter > 0) {
147         df.x[iter] = 0.0;
148         ierr = solve(&df);  CHKERRQ(ierr);
149       }
150       else
151 	df.x[0] = 1.0;
152 
153       /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
154       jtwt = 0.0;
155       ierr = VecSet(bmrm->local_w, 0.0);  CHKERRQ(ierr);
156       pgrad = grad_list.next;
157       for (i=0; i<=iter; i++) {
158         jtwt -= df.x[i] * df.f[i];
159         ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V);  CHKERRQ(ierr);
160         pgrad = pgrad->next;
161       }
162 
163       ierr = VecNorm(bmrm->local_w, NORM_2, &reg);  CHKERRQ(ierr);
164       reg = 0.5*lambda*reg*reg;
165       jtwt -= reg;
166     } /* end if rank == 0 */
167 
168     /* scatter the new W to all nodes */
169     ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE); CHKERRQ(ierr);
170     ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE); CHKERRQ(ierr);
171 
172     ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G); CHKERRQ(ierr);
173 
174     MPI_Bcast(&jtwt,1,MPI_DOUBLE,0,PETSC_COMM_WORLD);
175     MPI_Bcast(&reg,1,MPI_DOUBLE,0,PETSC_COMM_WORLD);
176 
177     jw = reg + f;					/* J(w) = regularizer + Remp(w) */
178     if (jw < min_jw)
179       min_jw = jw;
180     if (jtwt > max_jtwt)
181       max_jtwt = jtwt;
182 
183     pre_epsilon = epsilon;
184     epsilon = min_jw - jtwt;
185 
186     if (rank == 0) {
187       if (innerSolverTol > epsilon)
188         innerSolverTol = epsilon;
189       else if (innerSolverTol < 1e-7)
190         innerSolverTol = 1e-7;
191 
192       /* if the annealing doesn't work well, lower the inner solver tolerance */
193       if(pre_epsilon < epsilon)
194         innerSolverTol *= 0.2;
195 
196       df.tol = innerSolverTol*0.5;
197     }
198 
199     iter++;
200     ierr = TaoMonitor(tao,iter,min_jw,epsilon,0.0,tao->step,&reason); CHKERRQ(ierr);
201   }
202 
203   /* free all the memory */
204   if (rank == 0) {
205     ierr = destroy_grad_list(&grad_list); 	CHKERRQ(ierr);
206     ierr = destroy_df_solver(&df); CHKERRQ(ierr);
207   }
208 
209   ierr = VecDestroy(&bmrm->local_w); CHKERRQ(ierr);
210   ierr = VecScatterDestroy(&bmrm->scatter); CHKERRQ(ierr);
211 
212   PetscFunctionReturn(0);
213 }
214 
215 
216 /* ---------------------------------------------------------- */
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "TaoSetup_BMRM"
220 static PetscErrorCode TaoSetup_BMRM(TaoSolver tao) {
221 
222   PetscErrorCode      ierr;
223 
224   PetscFunctionBegin;
225 
226   /* Allocate some arrays */
227   if (!tao->gradient) {
228     ierr = VecDuplicate(tao->solution, &tao->gradient);    CHKERRQ(ierr);
229   }
230 
231   PetscFunctionReturn(0);
232 }
233 
234 
235 /*------------------------------------------------------------*/
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "TaoDestroy_BMRM"
239 static PetscErrorCode TaoDestroy_BMRM(TaoSolver tao)
240 {
241   PetscErrorCode      ierr;
242 
243   /* Free allocated memory in custom BMRM structure */
244   PetscFunctionBegin;
245   ierr = PetscFree(tao->data); CHKERRQ(ierr);
246   tao->data = PETSC_NULL;
247 
248   PetscFunctionReturn(0);
249 }
250 
251 
252 #undef __FUNCT__
253 #define __FUNCT__ "TaoSetFromOptions_BMRM"
254 static PetscErrorCode TaoSetFromOptions_BMRM(TaoSolver tao)
255 {
256   PetscErrorCode      ierr;
257   TAO_BMRM*           bmrm = (TAO_BMRM*)tao->data;
258   PetscBool           flg;
259 
260   PetscFunctionBegin;
261   ierr = PetscOptionsHead("BMRM for regularized risk minimization");CHKERRQ(ierr);
262   ierr = PetscOptionsReal("-tao_bmrm_lambda", "regulariser weight","", 100,&bmrm->lambda,&flg);  CHKERRQ(ierr);
263   ierr = PetscOptionsTail();CHKERRQ(ierr);
264   PetscFunctionReturn(0);
265 }
266 
267 
268 /*------------------------------------------------------------*/
269 #undef __FUNCT__
270 #define __FUNCT__ "TaoView_BMRM"
271 static PetscErrorCode TaoView_BMRM(TaoSolver tao, PetscViewer viewer)
272 {
273   PetscBool           isascii;
274   PetscErrorCode      ierr;
275 
276   PetscFunctionBegin;
277   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
278   if (isascii) {
279     ierr = PetscViewerASCIIPushTab(viewer); CHKERRQ(ierr);
280     ierr = PetscViewerASCIIPopTab(viewer); CHKERRQ(ierr);
281   }
282   else{
283     SETERRQ1(((PetscObject)tao)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for TAO BMRM",((PetscObject)viewer)->type_name);
284   }
285 
286   PetscFunctionReturn(0);
287 }
288 
289 
290 /*------------------------------------------------------------*/
291 EXTERN_C_BEGIN
292 #undef __FUNCT__
293 #define __FUNCT__ "TaoCreate_BMRM"
294 PetscErrorCode TaoCreate_BMRM(TaoSolver tao)
295 {
296   TAO_BMRM *bmrm;
297   PetscErrorCode      ierr;
298 
299   PetscFunctionBegin;
300   tao->ops->setup = TaoSetup_BMRM;
301   tao->ops->solve = TaoSolve_BMRM;
302   tao->ops->view  = TaoView_BMRM;
303   tao->ops->setfromoptions = TaoSetFromOptions_BMRM;
304   tao->ops->destroy = TaoDestroy_BMRM;
305 
306   ierr = PetscNewLog(tao,&bmrm); CHKERRQ(ierr);
307   bmrm->lambda = 1.0;
308   tao->data = (void*)bmrm;
309 
310   /* Note: May need to be tuned! */
311   tao->max_it = 2048;
312   tao->max_funcs = 300000;
313   tao->fatol = 1e-20;
314   tao->frtol = 1e-25;
315   tao->gatol = 1e-25;
316   tao->grtol = 1e-25;
317 
318   PetscFunctionReturn(0);
319 }
320 EXTERN_C_END
321 
322 
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "init_df_solver"
326 PetscErrorCode init_df_solver(TAO_DF *df)
327 {
328   PetscInt i, n = INCRE_DIM;
329   PetscErrorCode ierr;
330 
331   PetscFunctionBegin;
332 
333   /* default values */
334   df->maxProjIter = 200;
335   df->maxPGMIter = 300000;
336   df->b = 1.0;
337 
338   /* memory space required by Dai-Fletcher */
339   df->cur_num_cp = n;
340   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->f);  CHKERRQ(ierr);
341   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->a);  CHKERRQ(ierr);
342   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->l);  CHKERRQ(ierr);
343   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->u);  CHKERRQ(ierr);
344   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->x);  CHKERRQ(ierr);
345   ierr = PetscMalloc(sizeof(PetscReal*)*n, &df->Q);  CHKERRQ(ierr);
346 
347   for (i = 0; i < n; i ++) {
348     ierr = PetscMalloc(sizeof(PetscReal)*n, &df->Q[i]);  CHKERRQ(ierr);
349   }
350 
351   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->g);  CHKERRQ(ierr);
352   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->y);  CHKERRQ(ierr);
353   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tempv);  CHKERRQ(ierr);
354   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->d);  CHKERRQ(ierr);
355   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->Qd);  CHKERRQ(ierr);
356   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->t);  CHKERRQ(ierr);
357   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->xplus);  CHKERRQ(ierr);
358   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tplus);  CHKERRQ(ierr);
359   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->sk);  CHKERRQ(ierr);
360   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->yk);  CHKERRQ(ierr);
361 
362   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt);  CHKERRQ(ierr);
363   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt2);  CHKERRQ(ierr);
364   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->uv);  CHKERRQ(ierr);
365 
366   PetscFunctionReturn(0);
367 }
368 
369 
370 #undef __FUNCT__
371 #define __FUNCT__ "ensure_df_space"
372 PetscErrorCode ensure_df_space(PetscInt dim, TAO_DF *df)
373 {
374   PetscErrorCode ierr;
375   PetscReal *tmp, **tmp_Q;
376   PetscInt i, n, old_n;
377 
378   df->dim = dim;
379   if (dim <= df->cur_num_cp)
380     return 0;
381 
382   PetscFunctionBegin;
383 
384   old_n = df->cur_num_cp;
385   df->cur_num_cp += INCRE_DIM;
386   n = df->cur_num_cp;
387 
388   /* memory space required by dai-fletcher */
389   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp);  CHKERRQ(ierr);
390   ierr = PetscMemcpy(tmp, df->f, sizeof(PetscReal)*old_n);  CHKERRQ(ierr);
391   ierr = PetscFree(df->f);  CHKERRQ(ierr);
392   df->f = tmp;
393 
394   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp);  CHKERRQ(ierr);
395   ierr = PetscMemcpy(tmp, df->a, sizeof(PetscReal)*old_n);  CHKERRQ(ierr);
396   ierr = PetscFree(df->a);  CHKERRQ(ierr);
397   df->a = tmp;
398 
399   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp);  CHKERRQ(ierr);
400   ierr = PetscMemcpy(tmp, df->l, sizeof(PetscReal)*old_n);  CHKERRQ(ierr);
401   ierr = PetscFree(df->l);  CHKERRQ(ierr);
402   df->l = tmp;
403 
404   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp);  CHKERRQ(ierr);
405   ierr = PetscMemcpy(tmp, df->u, sizeof(PetscReal)*old_n);  CHKERRQ(ierr);
406   ierr = PetscFree(df->u);  CHKERRQ(ierr);
407   df->u = tmp;
408 
409   ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp);  CHKERRQ(ierr);
410   ierr = PetscMemcpy(tmp, df->x, sizeof(PetscReal)*old_n);  CHKERRQ(ierr);
411   ierr = PetscFree(df->x);  CHKERRQ(ierr);
412   df->x = tmp;
413 
414   ierr = PetscMalloc(sizeof(PetscReal*)*n, &tmp_Q);  CHKERRQ(ierr);
415   for (i = 0; i < n; i ++)
416   {
417     ierr = PetscMalloc(sizeof(PetscReal)*n, &tmp_Q[i]);  CHKERRQ(ierr);
418     if (i < old_n)
419     {
420       ierr = PetscMemcpy(tmp_Q[i], df->Q[i], sizeof(PetscReal)*old_n);  CHKERRQ(ierr);
421       ierr = PetscFree(df->Q[i]);  CHKERRQ(ierr);
422     }
423   }
424 
425   ierr = PetscFree(df->Q);  CHKERRQ(ierr);
426   df->Q = tmp_Q;
427 
428   ierr = PetscFree(df->g);  CHKERRQ(ierr);
429   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->g);  CHKERRQ(ierr);
430 
431   ierr = PetscFree(df->y);  CHKERRQ(ierr);
432   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->y);  CHKERRQ(ierr);
433 
434   ierr = PetscFree(df->tempv);  CHKERRQ(ierr);
435   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tempv);  CHKERRQ(ierr);
436 
437   ierr = PetscFree(df->d);  CHKERRQ(ierr);
438   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->d);  CHKERRQ(ierr);
439 
440   ierr = PetscFree(df->Qd);  CHKERRQ(ierr);
441   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->Qd);  CHKERRQ(ierr);
442 
443   ierr = PetscFree(df->t);  CHKERRQ(ierr);
444   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->t);  CHKERRQ(ierr);
445 
446   ierr = PetscFree(df->xplus);  CHKERRQ(ierr);
447   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->xplus);  CHKERRQ(ierr);
448 
449   ierr = PetscFree(df->tplus);  CHKERRQ(ierr);
450   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->tplus);  CHKERRQ(ierr);
451 
452   ierr = PetscFree(df->sk);  CHKERRQ(ierr);
453   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->sk);  CHKERRQ(ierr);
454 
455   ierr = PetscFree(df->yk);  CHKERRQ(ierr);
456   ierr = PetscMalloc(sizeof(PetscReal)*n, &df->yk);  CHKERRQ(ierr);
457 
458   ierr = PetscFree(df->ipt);  CHKERRQ(ierr);
459   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt);  CHKERRQ(ierr);
460 
461   ierr = PetscFree(df->ipt2);  CHKERRQ(ierr);
462   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->ipt2);  CHKERRQ(ierr);
463 
464   ierr = PetscFree(df->uv);  CHKERRQ(ierr);
465   ierr = PetscMalloc(sizeof(PetscInt)*n, &df->uv);  CHKERRQ(ierr);
466 
467   PetscFunctionReturn(0);
468 }
469 
470 
471 #undef __FUNCT__
472 #define __FUNCT__ "destroy_df_solver"
473 PetscErrorCode destroy_df_solver(TAO_DF *df)
474 {
475   PetscErrorCode ierr;
476   PetscInt i;
477   PetscFunctionBegin;
478 
479   if (df->f)   {
480     ierr = PetscFree(df->f);  CHKERRQ(ierr);    df->f = PETSC_NULL;
481   }
482 
483   if (df->a)   {
484     ierr = PetscFree(df->a);  CHKERRQ(ierr);    df->a = PETSC_NULL;
485   }
486 
487   if (df->l)   {
488     ierr = PetscFree(df->l);  CHKERRQ(ierr);    df->l = PETSC_NULL;
489   }
490 
491   if (df->u)   {
492     ierr = PetscFree(df->u);  CHKERRQ(ierr);    df->u = PETSC_NULL;
493   }
494 
495   if (df->x)   {
496     ierr = PetscFree(df->x);  CHKERRQ(ierr);    df->x = PETSC_NULL;
497   }
498 
499   for (i = 0; i < df->cur_num_cp; i ++)
500   {
501     ierr = PetscFree(df->Q[i]);  CHKERRQ(ierr);
502   }
503   ierr = PetscFree(df->Q);  CHKERRQ(ierr);
504 
505 
506   if (df->ipt)   {
507     ierr = PetscFree(df->ipt);  CHKERRQ(ierr);    df->ipt = PETSC_NULL;
508   }
509   if (df->ipt2)   {
510     ierr = PetscFree(df->ipt2);  CHKERRQ(ierr);    df->ipt2 = PETSC_NULL;
511   }
512   if (df->uv)   {
513     ierr = PetscFree(df->uv);  CHKERRQ(ierr);    df->uv = PETSC_NULL;
514   }
515   if (df->g)   {
516     ierr = PetscFree(df->g);  CHKERRQ(ierr);    df->g = PETSC_NULL;
517   }
518   if (df->y)   {
519     ierr = PetscFree(df->y);  CHKERRQ(ierr);    df->y = PETSC_NULL;
520   }
521   if (df->tempv)   {
522     ierr = PetscFree(df->tempv);  CHKERRQ(ierr);    df->tempv = PETSC_NULL;
523   }
524   if (df->d)   {
525     ierr = PetscFree(df->d);  CHKERRQ(ierr);    df->d = PETSC_NULL;
526   }
527   if (df->Qd)   {
528     ierr = PetscFree(df->Qd);  CHKERRQ(ierr);    df->Qd = PETSC_NULL;
529   }
530   if (df->t)   {
531     ierr = PetscFree(df->t);  CHKERRQ(ierr);    df->t = PETSC_NULL;
532   }
533   if (df->xplus)   {
534     ierr = PetscFree(df->xplus);  CHKERRQ(ierr);    df->xplus = PETSC_NULL;
535   }
536   if (df->tplus)   {
537     ierr = PetscFree(df->tplus);  CHKERRQ(ierr);    df->tplus = PETSC_NULL;
538   }
539   if (df->sk)   {
540     ierr = PetscFree(df->sk);  CHKERRQ(ierr);    df->sk = PETSC_NULL;
541   }
542   if (df->yk)   {
543     ierr = PetscFree(df->yk);  CHKERRQ(ierr);    df->yk = PETSC_NULL;
544   }
545 
546   PetscFunctionReturn(0);
547 }
548 
549 
550 /* Piecewise linear monotone target function for the Dai-Fletcher projector */
551 #undef __FUNCT__
552 #define __FUNCT__ "phi"
553 PetscReal phi(PetscReal *x,
554            PetscInt n,
555            PetscReal lambda,
556            PetscReal *a,
557            PetscReal b,
558            PetscReal *c,
559            PetscReal *l,
560            PetscReal *u)
561 {
562   PetscReal r = 0.0;
563   PetscInt i;
564 
565   for (i = 0; i < n; i++){
566     x[i] = -c[i] + lambda*a[i];
567     if (x[i] > u[i])
568       x[i] = u[i];
569     else if(x[i] < l[i])
570       x[i] = l[i];
571     r += a[i]*x[i];
572   }
573   return r - b;
574 }
575 
576 
577 
578 /** Modified Dai-Fletcher QP projector solves the problem:
579  *
580  *      minimise  0.5*x'*x - c'*x
581  *      subj to   a'*x = b
582  *                l \leq x \leq u
583  *
584  *  \param c The point to be projected onto feasible set
585  */
586 #undef __FUNCT__
587 #define __FUNCT__ "project"
588 PetscInt project(PetscInt n,
589             PetscReal *a,
590             PetscReal b,
591             PetscReal *c,
592             PetscReal *l,
593             PetscReal *u,
594             PetscReal *x,
595             PetscReal *lam_ext,
596             TAO_DF *df)
597 {
598   PetscReal lambda, lambdal, lambdau, dlambda, lambda_new;
599   PetscReal r, rl, ru, s;
600   PetscInt    innerIter;
601   PetscBool nonNegativeSlack = PETSC_FALSE;
602 /*   PetscBool nonNegativeSlack = PETSC_TRUE; */
603 
604   *lam_ext = 0;
605   lambda  = 0;
606   dlambda = 0.5;
607   innerIter = 1;
608 
609   /*  \phi(x;lambda) := 0.5*x'*x + c'*x - lambda*(a'*x-b)
610    *
611    *  Optimality conditions for \phi:
612    *
613    *  1. lambda   <= 0
614    *  2. r        <= 0
615    *  3. r*lambda == 0
616    */
617 
618   /* Bracketing Phase */
619   r = phi(x, n, lambda, a, b, c, l, u);
620 
621   if(nonNegativeSlack)
622   {
623     /* inequality constraint, i.e., with \xi >= 0 constraint */
624     if (r < TOL_R)
625       return 0;
626   }
627   else
628   {
629     /* equality constraint ,i.e., without \xi >= 0 constraint */
630     if (fabs(r) < TOL_R)
631       return 0;
632   }
633 
634   if (r < 0.0){
635     lambdal = lambda;
636     rl      = r;
637     lambda  = lambda + dlambda;
638     r       = phi(x, n, lambda, a, b, c, l, u);
639     while (r < 0.0 && dlambda < BMRM_INFTY)  {
640       lambdal = lambda;
641       s       = rl/r - 1.0;
642       if (s < 0.1) s = 0.1;
643       dlambda = dlambda + dlambda/s;
644       lambda  = lambda + dlambda;
645       rl      = r;
646       r       = phi(x, n, lambda, a, b, c, l, u);
647     }
648     lambdau = lambda;
649     ru      = r;
650   }
651   else {
652     lambdau = lambda;
653     ru      = r;
654     lambda  = lambda - dlambda;
655     r       = phi(x, n, lambda, a, b, c, l, u);
656     while (r > 0.0 && dlambda > -BMRM_INFTY) {
657       lambdau = lambda;
658       s       = ru/r - 1.0;
659       if (s < 0.1) s = 0.1;
660       dlambda = dlambda + dlambda/s;
661       lambda  = lambda - dlambda;
662       ru      = r;
663       r       = phi(x, n, lambda, a, b, c, l, u);
664     }
665     lambdal = lambda;
666     rl      = r;
667   }
668 
669   if(fabs(dlambda) > BMRM_INFTY) {
670     PetscPrintf(PETSC_COMM_SELF, "ERROR: L2N2_DaiFletcherPGM detected Infeasible QP problem!\n");
671     exit(0);
672   }
673 
674   if(ru == 0){
675     lambda = lambdau;
676     r = phi(x, n, lambda, a, b, c, l, u);
677     return innerIter;
678   }
679 
680   /* Secant Phase */
681   s       = 1.0 - rl/ru;
682   dlambda = dlambda/s;
683   lambda  = lambdau - dlambda;
684   r       = phi(x, n, lambda, a, b, c, l, u);
685 
686   while (fabs(r) > TOL_R
687          && dlambda > TOL_LAM * (1.0 + fabs(lambda))
688          && innerIter < df->maxProjIter){
689     innerIter++;
690     if (r > 0.0){
691       if (s <= 2.0){
692         lambdau = lambda;
693         ru      = r;
694         s       = 1.0 - rl/ru;
695         dlambda = (lambdau - lambdal) / s;
696         lambda  = lambdau - dlambda;
697       }
698       else {
699         s          = ru/r-1.0;
700         if (s < 0.1) s = 0.1;
701         dlambda    = (lambdau - lambda) / s;
702         lambda_new = 0.75*lambdal + 0.25*lambda;
703         if (lambda_new < (lambda - dlambda))
704           lambda_new = lambda - dlambda;
705         lambdau    = lambda;
706         ru         = r;
707         lambda     = lambda_new;
708         s          = (lambdau - lambdal) / (lambdau - lambda);
709       }
710     }
711     else {
712       if (s >= 2.0){
713         lambdal = lambda;
714         rl      = r;
715         s       = 1.0 - rl/ru;
716         dlambda = (lambdau - lambdal) / s;
717         lambda  = lambdau - dlambda;
718       }
719       else {
720         s          = rl/r - 1.0;
721         if (s < 0.1) s = 0.1;
722         dlambda    = (lambda-lambdal) / s;
723         lambda_new = 0.75*lambdau + 0.25*lambda;
724         if (lambda_new > (lambda + dlambda))
725           lambda_new = lambda + dlambda;
726         lambdal    = lambda;
727         rl         = r;
728         lambda     = lambda_new;
729         s          = (lambdau - lambdal) / (lambdau-lambda);
730       }
731     }
732     r = phi(x, n, lambda, a, b, c, l, u);
733   }
734 
735   *lam_ext = lambda;
736   if(innerIter >= df->maxProjIter)
737     PetscPrintf(PETSC_COMM_SELF, "WARNING: DaiFletcher max iterations\n");
738 
739   return innerIter;
740 }
741 
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "solve"
745 PetscErrorCode solve(TAO_DF *df)
746 {
747   PetscErrorCode ierr;
748   PetscInt    i, j, innerIter, it, it2, luv, info, lscount = 0, projcount = 0;
749   PetscReal gd, max, ak, bk, akold, bkold, lamnew, alpha, kktlam=0.0, lam_ext;
750   PetscReal DELTAsv, ProdDELTAsv;
751   PetscReal c, *tempQ;
752   PetscReal *x = df->x, *a = df->a, b = df->b, *l = df->l, *u = df->u, tol = df->tol;
753   PetscReal *tempv = df->tempv, *y = df->y, *g = df->g, *d = df->d, *Qd = df->Qd;
754   PetscReal *xplus = df->xplus, *tplus = df->tplus, *sk = df->sk, *yk = df->yk;
755   PetscReal **Q = df->Q, *f = df->f, *t = df->t;
756   PetscInt dim = df->dim, *ipt = df->ipt, *ipt2 = df->ipt2, *uv = df->uv;
757 
758   /*** variables for the adaptive nonmonotone linesearch ***/
759   PetscInt    L, llast;
760   PetscReal fr, fbest, fv, fc, fv0;
761   c = BMRM_INFTY;
762 
763   DELTAsv = EPS_SV;
764   if (tol <= 1.0e-5 || dim <= 20)
765     ProdDELTAsv = 0.0F;
766   else
767     ProdDELTAsv = EPS_SV;
768 
769   for (i = 0; i < dim; i++)
770     tempv[i] = -x[i];
771 
772   lam_ext = 0.0;
773 
774   /* Project the initial solution */
775   projcount += project(dim, a, b, tempv, l, u, x, &lam_ext, df);
776 
777   /* Compute gradient
778      g = Q*x + f; */
779 
780   it = 0;
781   for (i = 0; i < dim; i++)
782     if (fabs(x[i]) > ProdDELTAsv)
783       ipt[it++] = i;
784 
785   ierr = PetscMemzero(t, dim*sizeof(PetscReal));  CHKERRQ(ierr);
786   for (i = 0; i < it; i++){
787     tempQ = Q[ipt[i]];
788     for (j = 0; j < dim; j++)
789       t[j] += (tempQ[j]*x[ipt[i]]);
790   }
791   for (i = 0; i < dim; i++){
792     g[i] = t[i] + f[i];
793   }
794 
795 
796   /* y = -(x_{k} - g_{k}) */
797   for (i = 0; i < dim; i++){
798     y[i] = g[i] - x[i];
799   }
800 
801   /* Project x_{k} - g_{k} */
802   projcount += project(dim, a, b, y, l, u, tempv, &lam_ext, df);
803 
804   /* y = P(x_{k} - g_{k}) - x_{k} */
805   max = ALPHA_MIN;
806   for (i = 0; i < dim; i++){
807     y[i] = tempv[i] - x[i];
808     if (fabs(y[i]) > max)
809       max = fabs(y[i]);
810   }
811 
812   if (max < tol*1e-3){
813     lscount = 0;
814     innerIter    = 0;
815     return 0;
816   }
817 
818   alpha = 1.0 / max;
819 
820   /* fv0 = f(x_{0}). Recall t = Q x_{k}  */
821   fv0   = 0.0;
822   for (i = 0; i < dim; i++)
823     fv0 += x[i] * (0.5*t[i] + f[i]);
824 
825   /*** adaptive nonmonotone linesearch ***/
826   L     = 2;
827   fr    = ALPHA_MAX;
828   fbest = fv0;
829   fc    = fv0;
830   llast = 0;
831   akold = bkold = 0.0;
832 
833   /***      Iterator begins     ***/
834   for (innerIter = 1; innerIter <= df->maxPGMIter; innerIter++) {
835 
836     /* tempv = -(x_{k} - alpha*g_{k}) */
837     for (i = 0; i < dim; i++)
838       tempv[i] = alpha*g[i] - x[i];
839 
840     /* Project x_{k} - alpha*g_{k} */
841     projcount += project(dim, a, b, tempv, l, u, y, &lam_ext, df);
842 
843 
844     /* gd = \inner{d_{k}}{g_{k}}
845         d = P(x_{k} - alpha*g_{k}) - x_{k}
846     */
847     gd = 0.0;
848     for (i = 0; i < dim; i++){
849       d[i] = y[i] - x[i];
850       gd  += d[i] * g[i];
851     }
852 
853     /* Gradient computation  */
854 
855     /* compute Qd = Q*d  or  Qd = Q*y - t depending on their sparsity */
856 
857     it = it2 = 0;
858     for (i = 0; i < dim; i++)
859       if (fabs(d[i]) > (ProdDELTAsv*1.0e-2))
860         ipt[it++]   = i;
861     for (i = 0; i < dim; i++)
862       if (fabs(y[i]) > ProdDELTAsv)
863         ipt2[it2++] = i;
864 
865     ierr = PetscMemzero(Qd, dim*sizeof(PetscReal));  CHKERRQ(ierr);
866     /* compute Qd = Q*d */
867     if (it < it2){
868       for (i = 0; i < it; i++){
869         tempQ = Q[ipt[i]];
870         for (j = 0; j < dim; j++)
871           Qd[j] += (tempQ[j] * d[ipt[i]]);
872       }
873     }
874     else { /* compute Qd = Q*y-t */
875       for (i = 0; i < it2; i++){
876         tempQ = Q[ipt2[i]];
877         for (j = 0; j < dim; j++)
878           Qd[j] += (tempQ[j] * y[ipt2[i]]);
879       }
880       for (j = 0; j < dim; j++)
881         Qd[j] -= t[j];
882     }
883 
884 
885     /* ak = inner{d_{k}}{d_{k}} */
886     ak = 0.0;
887     for (i = 0; i < dim; i++)
888       ak += d[i] * d[i];
889 
890     bk = 0.0;
891     for (i = 0; i < dim; i++)
892       bk += d[i]*Qd[i];
893 
894     if (bk > EPS*ak && gd < 0.0)    /* ak is normd */
895       lamnew = -gd/bk;
896     else
897       lamnew = 1.0;
898 
899     /* fv is computing f(x_{k} + d_{k}) */
900     fv = 0.0;
901     for (i = 0; i < dim; i++){
902       xplus[i] = x[i] + d[i];
903       tplus[i] = t[i] + Qd[i];
904       fv      += xplus[i] * (0.5*tplus[i] + f[i]);
905     }
906 
907     /* fr is fref */
908     if ((innerIter == 1 && fv >= fv0) || (innerIter > 1 && fv >= fr)){
909       lscount++;
910       fv = 0.0;
911       for (i = 0; i < dim; i++){
912         xplus[i] = x[i] + lamnew*d[i];
913         tplus[i] = t[i] + lamnew*Qd[i];
914         fv      += xplus[i] * (0.5*tplus[i] + f[i]);
915       }
916     }
917 
918     for (i = 0; i < dim; i++){
919       sk[i] = xplus[i] - x[i];
920       yk[i] = tplus[i] - t[i];
921       x[i]  = xplus[i];
922       t[i]  = tplus[i];
923       g[i]  = t[i] + f[i];
924     }
925 
926 
927     /* update the line search control parameters */
928 
929     if (fv < fbest){
930       fbest = fv;
931       fc    = fv;
932       llast = 0;
933     }
934     else {
935       fc = (fc > fv ? fc : fv);
936       llast++;
937       if (llast == L){
938         fr    = fc;
939         fc    = fv;
940         llast = 0;
941       }
942     }
943 
944     ak = bk = 0.0;
945     for (i = 0; i < dim; i++){
946       ak += sk[i] * sk[i];
947       bk += sk[i] * yk[i];
948     }
949 
950     if (bk <= EPS*ak)
951       alpha = ALPHA_MAX;
952     else{
953       if (bkold < EPS*akold)
954         alpha = ak/bk;
955       else
956         alpha = (akold+ak)/(bkold+bk);
957 
958       if (alpha > ALPHA_MAX)
959         alpha = ALPHA_MAX;
960       else if (alpha < ALPHA_MIN)
961         alpha = ALPHA_MIN;
962     }
963 
964     akold = ak;
965     bkold = bk;
966 
967 
968     /*** stopping criterion based on KKT conditions ***/
969     /* at optimal, gradient of lagrangian w.r.t. x is zero */
970 
971     bk = 0.0;
972     for (i = 0; i < dim; i++)
973       bk +=  x[i] * x[i];
974 
975     if (sqrt(ak) < tol*10 * sqrt(bk)){
976       it     = 0;
977       luv    = 0;
978       kktlam = 0.0;
979       for (i = 0; i < dim; i++){
980         /* x[i] is active hence lagrange multipliers for box constraints
981                 are zero. The lagrange multiplier for ineq. const. is then
982                 defined as below
983         */
984         if ((x[i] > DELTAsv) && (x[i] < c-DELTAsv)){
985           ipt[it++] = i;
986           kktlam    = kktlam - a[i]*g[i];
987         }
988         else
989           uv[luv++] = i;
990       }
991 
992       if (it == 0 && sqrt(ak) < tol*0.5 * sqrt(bk))
993         return 0;
994       else {
995         kktlam = kktlam/it;
996         info   = 1;
997         for (i = 0; i < it; i++) {
998           if (fabs(a[ipt[i]] * g[ipt[i]] + kktlam) > tol) {
999             info = 0;
1000             break;
1001           }
1002         }
1003         if (info == 1)  {
1004           for (i = 0; i < luv; i++)  {
1005             if (x[uv[i]] <= DELTAsv){
1006               /* x[i] == lower bound, hence, lagrange multiplier (say, beta) for lower bound may
1007                      not be zero. So, the gradient without beta is > 0
1008               */
1009               if (g[uv[i]] + kktlam*a[uv[i]] < -tol){
1010                 info = 0;
1011                 break;
1012               }
1013             }
1014             else {
1015               /* x[i] == upper bound, hence, lagrange multiplier (say, eta) for upper bound may
1016                      not be zero. So, the gradient without eta is < 0
1017               */
1018               if (g[uv[i]] + kktlam*a[uv[i]] > tol) {
1019                 info = 0;
1020                 break;
1021               }
1022             }
1023           }
1024         }
1025 
1026         if (info == 1)
1027           return 0;
1028       }
1029     }
1030   }
1031   return 0;
1032 }
1033 
1034 
1035