xref: /petsc/src/tao/leastsquares/impls/brgn/brgn.c (revision 8e85b1b3a48c719bb7d116aee5cfc4a5b180d411)
1 #include <../src/tao/leastsquares/impls/brgn/brgn.h>
2 
3 static PetscErrorCode GNHessianProd(Mat H, Vec in, Vec out)
4 {
5   TAO_BRGN              *gn;
6   PetscErrorCode        ierr;
7 
8   PetscFunctionBegin;
9   ierr = MatShellGetContext(H, &gn);CHKERRQ(ierr);
10   ierr = MatMult(gn->subsolver->ls_jac, in, gn->r_work);CHKERRQ(ierr);
11   ierr = MatMultTranspose(gn->subsolver->ls_jac, gn->r_work, out);CHKERRQ(ierr);
12   /* out = out + lambda*D'*(diag.*(D*in)) */
13   ierr = MatMult(gn->D, in, gn->y);CHKERRQ(ierr);  /* y = D*in */
14   ierr = VecPointwiseMult(gn->y_work, gn->diag, gn->y);CHKERRQ(ierr);     /* y_work = diag.*(D*in), where diag = epsilon^2 ./ sqrt(x.^2+epsilon^2).^3 */
15   ierr = MatMultTranspose(gn->D, gn->y_work, gn->x_work);CHKERRQ(ierr);   /* x_work = D'*(diag.*(D*in)) */
16   ierr = VecAXPY(out, gn->lambda, gn->x_work);CHKERRQ(ierr);
17 
18   PetscFunctionReturn(0);
19 }
20 
21 static PetscErrorCode GNObjectiveGradientEval(Tao tao, Vec X, PetscReal *fcn, Vec G, void *ptr)
22 {
23   TAO_BRGN              *gn = (TAO_BRGN *)ptr;
24   PetscInt              K;                    /* dimension of D*X */
25   PetscScalar           yESum;
26   PetscErrorCode        ierr;
27 
28   PetscFunctionBegin;
29     /* compute objective *fcn*/
30   /* compute first term ||ls_res||^2 */
31   ierr = TaoComputeResidual(tao, X, tao->ls_res);CHKERRQ(ierr);
32   ierr = VecDotBegin(tao->ls_res, tao->ls_res, fcn);CHKERRQ(ierr);
33   ierr = VecDotEnd(tao->ls_res, tao->ls_res, fcn);CHKERRQ(ierr);
34   /* add the second term lambda*sum(sqrt(y.^2+epsilon^2) - epsilon), where y = D*x*/
35   ierr = MatMult(gn->D, X, gn->y);CHKERRQ(ierr);  /* y = D*x */
36   ierr = VecPointwiseMult(gn->y_work, gn->y, gn->y);CHKERRQ(ierr);
37   ierr = VecShift(gn->y_work, gn->epsilon*gn->epsilon);CHKERRQ(ierr);
38   ierr = VecSqrtAbs(gn->y_work);CHKERRQ(ierr);      /* gn->y_work = sqrt(y.^2+epsilon^2) */
39   ierr = VecSum(gn->y_work, &yESum);CHKERRQ(ierr);CHKERRQ(ierr);
40   ierr = VecGetSize(gn->y, &K);CHKERRQ(ierr);
41   *fcn = 0.5*(*fcn) + gn->lambda*(yESum - K*gn->epsilon);
42 
43   /* compute gradient G */
44   ierr = TaoComputeResidualJacobian(tao, X, tao->ls_jac, tao->ls_jac_pre);CHKERRQ(ierr);
45   ierr = MatMultTranspose(tao->ls_jac, tao->ls_res, G);CHKERRQ(ierr);
46   /* compute G = G + lambda*D'*(y./sqrt(y.^2+epsilon^2)), where y = D*x */
47   ierr = VecPointwiseDivide(gn->y_work, gn->y, gn->y_work);CHKERRQ(ierr); /* reuse y_work = y./sqrt(y.^2+epsilon^2) */
48   ierr = MatMultTranspose(gn->D, gn->y_work, gn->x_work);CHKERRQ(ierr);
49   ierr = VecAXPY(G, gn->lambda, gn->x_work);CHKERRQ(ierr);
50 
51   PetscFunctionReturn(0);
52 }
53 
54 
55 static PetscErrorCode GNComputeHessian(Tao tao, Vec X, Mat H, Mat Hpre, void *ptr)
56 {
57   TAO_BRGN              *gn = (TAO_BRGN *)ptr;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = TaoComputeResidualJacobian(tao, X, tao->ls_jac, tao->ls_jac_pre);CHKERRQ(ierr);
62 
63   /* calculate and store diagonal matrix as a vector: diag = epsilon^2 ./ sqrt(x.^2+epsilon^2).^3* --> diag = epsilon^2 ./ sqrt(y.^2+epsilon^2).^3, where y = D*x */
64   ierr = MatMult(gn->D, X, gn->y);CHKERRQ(ierr);  /* y = D*x */
65   ierr = VecPointwiseMult(gn->y_work, gn->y, gn->y);CHKERRQ(ierr);
66   ierr = VecShift(gn->y_work, gn->epsilon*gn->epsilon);CHKERRQ(ierr);
67   ierr = VecCopy(gn->y_work, gn->diag);CHKERRQ(ierr);                     /* gn->diag = y.^2+epsilon^2 */
68   ierr = VecSqrtAbs(gn->y_work);CHKERRQ(ierr);                            /* gn->y_work = sqrt(y.^2+epsilon^2) */
69   ierr = VecPointwiseMult(gn->diag, gn->y_work, gn->diag);CHKERRQ(ierr);  /* gn->diag = sqrt(y.^2+epsilon^2).^3 */
70   ierr = VecReciprocal(gn->diag);CHKERRQ(ierr);
71   ierr = VecScale(gn->diag, gn->epsilon*gn->epsilon);CHKERRQ(ierr);
72 
73   PetscFunctionReturn(0);
74 }
75 
76 static PetscErrorCode GNHookFunction(Tao tao, PetscInt iter)
77 {
78   TAO_BRGN              *gn = (TAO_BRGN *)tao->user_update;
79   PetscErrorCode        ierr;
80 
81   PetscFunctionBegin;
82   /* Update basic tao information from the subsolver */
83   gn->parent->nfuncs = tao->nfuncs;
84   gn->parent->ngrads = tao->ngrads;
85   gn->parent->nfuncgrads = tao->nfuncgrads;
86   gn->parent->nhess = tao->nhess;
87   gn->parent->niter = tao->niter;
88   gn->parent->ksp_its = tao->ksp_its;
89   gn->parent->ksp_tot_its = tao->ksp_tot_its;
90   ierr = TaoGetConvergedReason(tao, &gn->parent->reason);CHKERRQ(ierr);
91   /* Update the solution vectors */
92   if (iter == 0) {
93     ierr = VecSet(gn->x_old, 0.0);CHKERRQ(ierr);
94   } else {
95     ierr = VecCopy(tao->solution, gn->x_old);CHKERRQ(ierr);
96     ierr = VecCopy(tao->solution, gn->parent->solution);CHKERRQ(ierr);
97   }
98   /* Update the gradient */
99   ierr = VecCopy(tao->gradient, gn->parent->gradient);CHKERRQ(ierr);
100   /* Call general purpose update function */
101   if (gn->parent->ops->update) {
102     ierr = (*gn->parent->ops->update)(gn->parent, gn->parent->niter);CHKERRQ(ierr);
103   }
104   PetscFunctionReturn(0);
105 }
106 
107 static PetscErrorCode TaoSolve_BRGN(Tao tao)
108 {
109   TAO_BRGN              *gn = (TAO_BRGN *)tao->data;
110   PetscErrorCode        ierr;
111 
112   PetscFunctionBegin;
113   ierr = TaoSolve(gn->subsolver);CHKERRQ(ierr);
114   /* Update basic tao information from the subsolver */
115   tao->nfuncs = gn->subsolver->nfuncs;
116   tao->ngrads = gn->subsolver->ngrads;
117   tao->nfuncgrads = gn->subsolver->nfuncgrads;
118   tao->nhess = gn->subsolver->nhess;
119   tao->niter = gn->subsolver->niter;
120   tao->ksp_its = gn->subsolver->ksp_its;
121   tao->ksp_tot_its = gn->subsolver->ksp_tot_its;
122   ierr = TaoGetConvergedReason(gn->subsolver, &tao->reason);CHKERRQ(ierr);
123   /* Update vectors */
124   ierr = VecCopy(gn->subsolver->solution, tao->solution);CHKERRQ(ierr);
125   ierr = VecCopy(gn->subsolver->gradient, tao->gradient);CHKERRQ(ierr);
126   PetscFunctionReturn(0);
127 }
128 
129 static PetscErrorCode TaoSetFromOptions_BRGN(PetscOptionItems *PetscOptionsObject,Tao tao)
130 {
131   TAO_BRGN              *gn = (TAO_BRGN *)tao->data;
132   PetscErrorCode        ierr;
133 
134   PetscFunctionBegin;
135   /* old Tikhonov regularization code
136   ierr = PetscOptionsHead(PetscOptionsObject,"Gauss-Newton method for least-squares problems using Tikhonov regularization");CHKERRQ(ierr);
137   ierr = PetscOptionsReal("-tao_brgn_lambda", "Tikhonov regularization factor", "", gn->lambda, &gn->lambda, NULL);CHKERRQ(ierr);
138   */
139   ierr = PetscOptionsHead(PetscOptionsObject,"least-squares problems with L1 regularizer: ||f(x)||^2 + lambda*||x||_1. Currently L1-norm is approximated with smooth form");CHKERRQ(ierr);
140   ierr = PetscOptionsReal("-tao_brgn_lambda", "L1-norm regularizer weight", "", gn->lambda, &gn->lambda, NULL);CHKERRQ(ierr);
141   ierr = PetscOptionsReal("-tao_brgn_epsilon", "L1-norm smooth approximation parameter: ||x||_1 = sum(sqrt(x.^2+epsilon^2)-epsilon)", "", gn->epsilon, &gn->epsilon, NULL);CHKERRQ(ierr);
142   ierr = PetscOptionsTail();CHKERRQ(ierr);
143   ierr = TaoSetFromOptions(gn->subsolver);CHKERRQ(ierr);
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode TaoView_BRGN(Tao tao, PetscViewer viewer)
148 {
149   TAO_BRGN              *gn = (TAO_BRGN *)tao->data;
150   PetscErrorCode        ierr;
151 
152   PetscFunctionBegin;
153   ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
154   ierr = TaoView(gn->subsolver, viewer);CHKERRQ(ierr);
155   ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
156   PetscFunctionReturn(0);
157 }
158 
159 static PetscErrorCode TaoSetUp_BRGN(Tao tao)
160 {
161   TAO_BRGN              *gn = (TAO_BRGN *)tao->data;
162   PetscErrorCode        ierr;
163   PetscBool             is_bnls, is_bntr, is_bntl;
164   PetscInt              i, n, N, K; /* dict has size K*N*/
165   /*PetscScalar           v; */ /* XH: hack to set value of matrix */
166 
167   PetscFunctionBegin;
168   if (!tao->ls_res) SETERRQ(PetscObjectComm((PetscObject)tao), PETSC_ERR_ORDER, "TaoSetResidualRoutine() must be called before setup!");
169   ierr = PetscObjectTypeCompare((PetscObject)gn->subsolver, TAOBNLS, &is_bnls);CHKERRQ(ierr);
170   ierr = PetscObjectTypeCompare((PetscObject)gn->subsolver, TAOBNTR, &is_bntr);CHKERRQ(ierr);
171   ierr = PetscObjectTypeCompare((PetscObject)gn->subsolver, TAOBNTL, &is_bntl);CHKERRQ(ierr);
172   if ((is_bnls || is_bntr || is_bntl) && !tao->ls_jac) SETERRQ(PetscObjectComm((PetscObject)tao), PETSC_ERR_ORDER, "TaoSetResidualJacobianRoutine() must be called before setup!");
173   if (!tao->gradient){
174     ierr = VecDuplicate(tao->solution, &tao->gradient);CHKERRQ(ierr);
175   }
176   if (!gn->x_work){
177     ierr = VecDuplicate(tao->solution, &gn->x_work);CHKERRQ(ierr);
178   }
179   if (!gn->r_work){
180     ierr = VecDuplicate(tao->ls_res, &gn->r_work);CHKERRQ(ierr);
181   }
182   if (!gn->x_old) {
183     ierr = VecDuplicate(tao->solution, &gn->x_old);CHKERRQ(ierr);
184     ierr = VecSet(gn->x_old, 0.0);CHKERRQ(ierr);
185   }
186 
187   /*ierr = VecGetSize(tao->solution, &N);CHKERRQ(ierr);*/
188   /* TODO: Safeguard against NULL matrix */
189   /*if (!gn->D)*/
190   ierr = MatGetSize(gn->D, &K, &N);CHKERRQ(ierr); /* Shell matrices still must have sizes defined */
191    /* K = N for identity matrix, K=N-1 or N for gradient matrix */
192   if (!gn->y){
193     ierr = VecCreate(PETSC_COMM_SELF,&gn->y);CHKERRQ(ierr);
194     ierr = VecSetSizes(gn->y,PETSC_DECIDE,K);CHKERRQ(ierr);
195     ierr = VecSetFromOptions(gn->y);CHKERRQ(ierr);
196     ierr = VecSet(gn->y,0.0);CHKERRQ(ierr);
197 
198   }
199   if (!gn->y_work){
200     ierr = VecDuplicate(gn->y,&gn->y_work);CHKERRQ(ierr);
201   }
202   if (!gn->diag){
203     ierr = VecDuplicate(gn->y,&gn->diag);CHKERRQ(ierr);
204     ierr = VecSet(gn->diag,0.0);CHKERRQ(ierr);
205   }
206 
207   /* XH: debug: check matrix */
208   ierr = PetscPrintf(PETSC_COMM_SELF, "-------- Check D matrix: -------- \n"); CHKERRQ(ierr);
209   ierr = MatView(gn->D,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
210 
211 
212   if (!tao->setupcalled) {
213     /* Hessian setup */
214     ierr = VecGetLocalSize(tao->solution, &n);CHKERRQ(ierr);
215     ierr = VecGetSize(tao->solution, &N);CHKERRQ(ierr);
216     ierr = MatSetSizes(gn->H, n, n, N, N);CHKERRQ(ierr);
217     ierr = MatSetType(gn->H, MATSHELL);CHKERRQ(ierr);
218     ierr = MatSetUp(gn->H);CHKERRQ(ierr);
219     ierr = MatShellSetOperation(gn->H, MATOP_MULT, (void (*)(void))GNHessianProd);CHKERRQ(ierr);
220     ierr = MatShellSetContext(gn->H, (void*)gn);CHKERRQ(ierr);
221     /* Subsolver setup, include initial vector and dicttionary D */
222     ierr = TaoSetUpdate(gn->subsolver, GNHookFunction, (void*)gn);CHKERRQ(ierr);
223     ierr = TaoSetInitialVector(gn->subsolver, tao->solution);CHKERRQ(ierr);
224     if (tao->bounded) {
225       ierr = TaoSetVariableBounds(gn->subsolver, tao->XL, tao->XU);CHKERRQ(ierr);
226     }
227     ierr = TaoSetResidualRoutine(gn->subsolver, tao->ls_res, tao->ops->computeresidual, tao->user_lsresP);CHKERRQ(ierr);
228     ierr = TaoSetJacobianResidualRoutine(gn->subsolver, tao->ls_jac, tao->ls_jac, tao->ops->computeresidualjacobian, tao->user_lsjacP);CHKERRQ(ierr);
229     ierr = TaoSetObjectiveAndGradientRoutine(gn->subsolver, GNObjectiveGradientEval, (void*)gn);CHKERRQ(ierr);
230     ierr = TaoSetHessianRoutine(gn->subsolver, gn->H, gn->H, GNComputeHessian, (void*)gn);CHKERRQ(ierr);
231     /* Propagate some options down */
232     ierr = TaoSetTolerances(gn->subsolver, tao->gatol, tao->grtol, tao->gttol);CHKERRQ(ierr);
233     ierr = TaoSetMaximumIterations(gn->subsolver, tao->max_it);CHKERRQ(ierr);
234     ierr = TaoSetMaximumFunctionEvaluations(gn->subsolver, tao->max_funcs);CHKERRQ(ierr);
235     for (i=0; i<tao->numbermonitors; ++i) {
236       ierr = TaoSetMonitor(gn->subsolver, tao->monitor[i], tao->monitorcontext[i], tao->monitordestroy[i]);CHKERRQ(ierr);
237       ierr = PetscObjectReference((PetscObject)(tao->monitorcontext[i]));CHKERRQ(ierr);
238     }
239     ierr = TaoSetUp(gn->subsolver);CHKERRQ(ierr);
240   }
241   PetscFunctionReturn(0);
242 }
243 
244 static PetscErrorCode TaoDestroy_BRGN(Tao tao)
245 {
246   TAO_BRGN              *gn = (TAO_BRGN *)tao->data;
247   PetscErrorCode        ierr;
248 
249   PetscFunctionBegin;
250   if (tao->setupcalled) {
251     ierr = VecDestroy(&tao->gradient);CHKERRQ(ierr);
252     ierr = VecDestroy(&gn->x_work);CHKERRQ(ierr);
253     ierr = VecDestroy(&gn->r_work);CHKERRQ(ierr);
254     ierr = VecDestroy(&gn->x_old);CHKERRQ(ierr);
255     ierr = VecDestroy(&gn->diag);CHKERRQ(ierr);
256     ierr = VecDestroy(&gn->y);CHKERRQ(ierr);
257     ierr = VecDestroy(&gn->y_work);CHKERRQ(ierr);
258   }
259   ierr = MatDestroy(&gn->H);CHKERRQ(ierr);
260   ierr = MatDestroy(&gn->D);CHKERRQ(ierr);
261   ierr = TaoDestroy(&gn->subsolver);CHKERRQ(ierr);
262   gn->parent = NULL;
263   ierr = PetscFree(tao->data);CHKERRQ(ierr);
264   PetscFunctionReturn(0);
265 }
266 
267 /*MC
268   TAOBRGN - Bounded Regularized Gauss-Newton method for solving nonlinear least-squares
269             problems with bound constraints. This algorithm is a thin wrapper around TAOBNTL
270             that constructs the Guass-Newton problem with the user-provided least-squares
271             residual and Jacobian. The problem is regularized with an L2-norm proximal point
272             term.
273 
274   Options Database Keys:
275   + -tao_bqnk_max_cg_its - maximum number of bounded conjugate-gradient iterations taken in each Newton loop
276   . -tao_bqnk_init_type - trust radius initialization method ("constant", "direction", "interpolation")
277   . -tao_bqnk_update_type - trust radius update method ("step", "direction", "interpolation")
278   - -tao_bqnk_as_type - active-set estimation method ("none", "bertsekas")
279 
280   Level: beginner
281 M*/
282 PETSC_EXTERN PetscErrorCode TaoCreate_BRGN(Tao tao)
283 {
284   TAO_BRGN       *gn;
285   PetscErrorCode ierr;
286 
287   PetscFunctionBegin;
288   ierr = PetscNewLog(tao,&gn);CHKERRQ(ierr);
289 
290   tao->ops->destroy = TaoDestroy_BRGN;
291   tao->ops->setup = TaoSetUp_BRGN;
292   tao->ops->setfromoptions = TaoSetFromOptions_BRGN;
293   tao->ops->view = TaoView_BRGN;
294   tao->ops->solve = TaoSolve_BRGN;
295 
296   tao->data = (void*)gn;
297   gn->lambda = 1e-4;
298   gn->epsilon = 1e-6;
299   gn->parent = tao;
300 
301   ierr = MatCreate(PetscObjectComm((PetscObject)tao), &gn->H);CHKERRQ(ierr);
302   ierr = MatSetOptionsPrefix(gn->H, "tao_brgn_hessian_");CHKERRQ(ierr);
303 
304   ierr = TaoCreate(PetscObjectComm((PetscObject)tao), &gn->subsolver);CHKERRQ(ierr);
305   ierr = TaoSetType(gn->subsolver, TAOBNLS);CHKERRQ(ierr);
306   ierr = TaoSetOptionsPrefix(gn->subsolver, "tao_brgn_subsolver_");CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 /*@C
311   TaoBRGNGetSubsolver - Get the pointer to the subsolver inside BRGN
312 
313   Collective on Tao
314 
315   Level: developer
316 
317   Input Parameters:
318 +  tao - the Tao solver context
319 -  subsolver - the Tao sub-solver context
320 @*/
321 PetscErrorCode TaoBRGNGetSubsolver(Tao tao,Tao *subsolver)
322 {
323   TAO_BRGN       *gn = (TAO_BRGN *)tao->data;
324 
325   PetscFunctionBegin;
326   *subsolver = gn->subsolver;
327   PetscFunctionReturn(0);
328 }
329 
330 /*@C
331   TaoBRGNSetL1RegularizerWeight - Set the L1-norm regularizer weight for the Gauss-Newton least-squares algorithm
332 
333   Collective on Tao
334 
335   Level: developer
336 
337   Input Parameters:
338 +  tao - the Tao solver context
339 -  lambda - L1-norm regularizer weight
340 @*/
341 PetscErrorCode TaoBRGNSetL1RegularizerWeight(Tao tao,PetscReal lambda)
342 {
343   TAO_BRGN       *gn = (TAO_BRGN *)tao->data;
344 
345   /* Initialize lambda here */
346 
347   PetscFunctionBegin;
348   gn->lambda = lambda;
349   PetscFunctionReturn(0);
350 }
351 
352 /*@C
353   TaoBRGNSetL1SmoothEpsilon - Set the L1-norm smooth approximation parameter for L1-regularized least-squares algorithm
354 
355   Collective on Tao
356 
357   Level: developer
358 
359   Input Parameters:
360 +  tao - the Tao solver context
361 -  epsilon - L1-norm smooth approximation parameter
362 @*/
363 PetscErrorCode TaoBRGNSetL1SmoothEpsilon(Tao tao, PetscReal epsilon)
364 {
365   TAO_BRGN       *gn = (TAO_BRGN *)tao->data;
366 
367   /* Initialize epsilon here */
368 
369   PetscFunctionBegin;
370   gn->epsilon = epsilon;
371   PetscFunctionReturn(0);
372 }
373 
374 /*@C
375    TaoBRGNSetDictionaryMatrix - bind the dictionary matrix from user application context to gn->D, for compressed sensing (with least-squares problem)
376 
377    Input Parameters:
378 +  tao  - the Tao context
379 .  dict - the user specified dictionary matrix
380 
381     Level: developer
382 @*/
383 PetscErrorCode TaoBRGNSetDictionaryMatrix(Tao tao, Mat dict)
384 {
385   TAO_BRGN       *gn = (TAO_BRGN *)tao->data;
386   PetscErrorCode ierr;
387   PetscFunctionBegin;
388   PetscValidHeaderSpecific(tao,TAO_CLASSID,1);
389   if (dict) {
390     PetscValidHeaderSpecific(dict,MAT_CLASSID,2);
391     /*PetscCheckSameComm(tao,1,dict,2);*/
392     ierr = PetscObjectReference((PetscObject)dict);CHKERRQ(ierr);
393   }
394   ierr = MatDestroy(&gn->D);CHKERRQ(ierr);
395   gn->D = dict;  /* We allow to set a null dictionary, which means we just use default identity matrix? */
396   PetscFunctionReturn(0);
397 }
398 
399 /* XH:
400 Changed TaoBRGNSetTikhonovLambda --> TaoBRGNSetL1RegularizerWeight  in brgn.c, peststao.h, and zbrgnf.c.
401 Added TaoBRGNSetL1SmoothEpsilon by following TaoBRGNSetL1RegularizerWeight.
402 Added TaoBRGNSetDictionaryMatrix by following TaoBRGNSetL1RegularizerWeight
403  Maybe change D*x to D(x), and  A*x to A(x) as function handle
404  Maybe need to also keep y = D*x, to avoid duplicate frequent computation of D*x
405  */