xref: /petsc/src/tao/leastsquares/impls/brgn/brgn.c (revision 0d71dc2b6864a86bf8cca4de849a85878788a65c)
1 #include <../src/tao/leastsquares/impls/brgn/brgn.h>
2 
3 /* Old code
4 static PetscErrorCode GNHessianProd(Mat H, Vec in, Vec out)
5 {
6   TAO_BRGN              *gn;
7   PetscErrorCode        ierr;
8 
9   PetscFunctionBegin;
10   ierr = MatShellGetContext(H, &gn);CHKERRQ(ierr);
11   ierr = MatMult(gn->subsolver->ls_jac, in, gn->r_work);CHKERRQ(ierr);
12   ierr = MatMultTranspose(gn->subsolver->ls_jac, gn->r_work, out);CHKERRQ(ierr);
13   ierr = VecAXPY(out, gn->lambda, in);CHKERRQ(ierr);
14   PetscFunctionReturn(0);
15 }
16 
17 static PetscErrorCode GNObjectiveGradientEval(Tao tao, Vec X, PetscReal *fcn, Vec G, void *ptr)
18 {
19   TAO_BRGN              *gn = (TAO_BRGN *)ptr;
20   PetscScalar           xnorm2;
21   PetscErrorCode        ierr;
22 
23   PetscFunctionBegin;
24   ierr = TaoComputeResidual(tao, X, tao->ls_res);CHKERRQ(ierr);
25   ierr = VecDotBegin(tao->ls_res, tao->ls_res, fcn);CHKERRQ(ierr);
26   ierr = VecAXPBYPCZ(gn->x_work, 1.0, -1.0, 0.0, X, gn->x_old);CHKERRQ(ierr);
27   ierr = VecDotBegin(gn->x_work, gn->x_work, &xnorm2);CHKERRQ(ierr);
28   ierr = VecDotEnd(tao->ls_res, tao->ls_res, fcn);CHKERRQ(ierr);
29   ierr = VecDotEnd(gn->x_work, gn->x_work, &xnorm2);CHKERRQ(ierr);
30   *fcn = 0.5*(*fcn) + 0.5*gn->lambda*xnorm2;
31 
32   ierr = TaoComputeResidualJacobian(tao, X, tao->ls_jac, tao->ls_jac_pre);CHKERRQ(ierr);
33   ierr = MatMultTranspose(tao->ls_jac, tao->ls_res, G);CHKERRQ(ierr);
34   ierr = VecAXPBYPCZ(G, gn->lambda, -gn->lambda, 1.0, X, gn->x_old);CHKERRQ(ierr);
35   PetscFunctionReturn(0);
36 }
37 */
38 
39 static PetscErrorCode GNHessianProd(Mat H, Vec in, Vec out)
40 {
41   TAO_BRGN              *gn;
42   PetscInt              N = 3; /* dimension of X hack, to change size 3 to VecGetSize() from Vec X*/
43   PetscScalar           epsilon = 1e-6; /*XH: added epsilon to approximate L1-norm with sqrt(x^2+epsilon^2)-epsilon*/
44   PetscErrorCode        ierr;
45   Vec                   xE, tmp;          /* xE: vector sqrt(x.^2 + epsilon^2) that is reused multiple times */
46 
47   PetscFunctionBegin;
48   /* Allocate vectors */
49 
50   /* XH: Use x_work or r_work instead and do not create new vectors */
51   ierr = VecCreateSeq(MPI_COMM_SELF,N,&xE);CHKERRQ(ierr);
52   ierr = VecCreateSeq(MPI_COMM_SELF,N,&tmp);CHKERRQ(ierr);
53 
54   ierr = MatShellGetContext(H, &gn);CHKERRQ(ierr);
55   ierr = MatMult(gn->subsolver->ls_jac, in, gn->r_work);CHKERRQ(ierr);
56   ierr = MatMultTranspose(gn->subsolver->ls_jac, gn->r_work, out);CHKERRQ(ierr);
57 
58   /* XH: Use the sparse diagonal vector diag that has already been computed.  Code should just be the VecAXPY() */
59 
60   /* out = out +  lambda*epsilon^2*(in./xE.^3) */
61   /* xE = sqrt(x.^2+epsilon^2). Should we reuse code/result of xE from GNObjectiveGradientEval()?*/
62   ierr = VecPointwiseMult(xE, gn->x_old, gn->x_old);CHKERRQ(ierr);  /* hack, todo: change to X, how to add X? */
63   ierr = VecShift(xE, epsilon*epsilon);CHKERRQ(ierr);
64   ierr = VecCopy(xE, tmp);CHKERRQ(ierr);                 /* tmp = xE.^2 */
65   ierr = VecSqrtAbs(xE);CHKERRQ(ierr);
66   ierr = VecPointwiseMult(tmp, tmp, xE);CHKERRQ(ierr);   /* tmp = xE.^3 */
67   ierr = VecPointwiseDivide(tmp, in, tmp);CHKERRQ(ierr); /* tmp = in./xE.^3 */
68   ierr = VecAXPY(out, gn->lambda * epsilon * epsilon, tmp);CHKERRQ(ierr);
69 
70   /* Free PETSc data structures */
71   ierr = VecDestroy(&xE);CHKERRQ(ierr);
72   ierr = VecDestroy(&tmp);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 
76 static PetscErrorCode GNObjectiveGradientEval(Tao tao, Vec X, PetscReal *fcn, Vec G, void *ptr)
77 {
78   TAO_BRGN              *gn = (TAO_BRGN *)ptr;
79   PetscInt              N = 3; /* dimension of X hack, to change size 3 to VecGetSize() from Vec X*/
80   PetscScalar           xESum, epsilon = 1e-6;/* epsilon to approximate L1-norm with sqrt(x^2+epsilon^2)-epsilon*/
81   PetscErrorCode        ierr;
82   Vec                   xE;          /* xE: vector sqrt(x.^2 + epsilon^2) that is reused multiple times */
83 
84   PetscFunctionBegin;
85   /* Allocate vectors */
86 
87   /* XH: Do no create vectors; use x_work or r_work */
88   ierr = VecCreateSeq(MPI_COMM_SELF,N,&xE);CHKERRQ(ierr);
89 
90   ierr = TaoComputeResidual(tao, X, tao->ls_res);CHKERRQ(ierr);
91   ierr = VecDotBegin(tao->ls_res, tao->ls_res, fcn);CHKERRQ(ierr);
92   ierr = VecDotEnd(tao->ls_res, tao->ls_res, fcn);CHKERRQ(ierr);
93 
94   /* Compute xE = sqrt(x.^2+epsilon^2) */
95 
96   /* XH: Use epsilon from the gn structure */
97   /* XH: Use VecGetSize() rather than N */
98 
99   ierr = VecPointwiseMult(xE, X, X);CHKERRQ(ierr);
100   ierr = VecShift(xE, epsilon*epsilon);CHKERRQ(ierr);
101   ierr = VecSqrtAbs(xE);CHKERRQ(ierr);
102 
103   ierr = VecSum(xE,&xESum);CHKERRQ(ierr);CHKERRQ(ierr);
104   *fcn = 0.5*(*fcn) + gn->lambda*(xESum - N*epsilon);
105 
106   ierr = TaoComputeResidualJacobian(tao, X, tao->ls_jac, tao->ls_jac_pre);CHKERRQ(ierr);
107   ierr = MatMultTranspose(tao->ls_jac, tao->ls_res, G);CHKERRQ(ierr);
108   /* G = G + lambda*(X./xE) */
109   ierr = VecPointwiseDivide(xE, X, xE);CHKERRQ(ierr); /* reuse xE = X./xE */
110   ierr = VecAXPY(G, gn->lambda, xE);CHKERRQ(ierr);
111 
112   /* Free PETSc data structures */
113   ierr = VecDestroy(&xE);CHKERRQ(ierr);
114 
115   PetscFunctionReturn(0);
116 }
117 
118 
119 static PetscErrorCode GNComputeHessian(Tao tao, Vec X, Mat H, Mat Hpre, void *ptr)
120 {
121   PetscErrorCode ierr;
122 
123   PetscFunctionBegin;
124   ierr = TaoComputeResidualJacobian(tao, X, tao->ls_jac, tao->ls_jac_pre);CHKERRQ(ierr);
125 
126   /* XH: Calculate and store diagonal matrix as a vector; diag in the structure */
127   PetscFunctionReturn(0);
128 }
129 
130 static PetscErrorCode GNHookFunction(Tao tao, PetscInt iter)
131 {
132   TAO_BRGN              *gn = (TAO_BRGN *)tao->user_update;
133   PetscErrorCode        ierr;
134 
135   PetscFunctionBegin;
136   /* Update basic tao information from the subsolver */
137   gn->parent->nfuncs = tao->nfuncs;
138   gn->parent->ngrads = tao->ngrads;
139   gn->parent->nfuncgrads = tao->nfuncgrads;
140   gn->parent->nhess = tao->nhess;
141   gn->parent->niter = tao->niter;
142   gn->parent->ksp_its = tao->ksp_its;
143   gn->parent->ksp_tot_its = tao->ksp_tot_its;
144   ierr = TaoGetConvergedReason(tao, &gn->parent->reason);CHKERRQ(ierr);
145   /* Update the solution vectors */
146   if (iter == 0) {
147     ierr = VecSet(gn->x_old, 0.0);CHKERRQ(ierr);
148   } else {
149     ierr = VecCopy(tao->solution, gn->x_old);CHKERRQ(ierr);
150     ierr = VecCopy(tao->solution, gn->parent->solution);CHKERRQ(ierr);
151   }
152   /* Update the gradient */
153   ierr = VecCopy(tao->gradient, gn->parent->gradient);CHKERRQ(ierr);
154   /* Call general purpose update function */
155   if (gn->parent->ops->update) {
156     ierr = (*gn->parent->ops->update)(gn->parent, gn->parent->niter);CHKERRQ(ierr);
157   }
158   PetscFunctionReturn(0);
159 }
160 
161 static PetscErrorCode TaoSolve_BRGN(Tao tao)
162 {
163   TAO_BRGN              *gn = (TAO_BRGN *)tao->data;
164   PetscErrorCode        ierr;
165 
166   PetscFunctionBegin;
167   ierr = TaoSolve(gn->subsolver);CHKERRQ(ierr);
168   /* Update basic tao information from the subsolver */
169   tao->nfuncs = gn->subsolver->nfuncs;
170   tao->ngrads = gn->subsolver->ngrads;
171   tao->nfuncgrads = gn->subsolver->nfuncgrads;
172   tao->nhess = gn->subsolver->nhess;
173   tao->niter = gn->subsolver->niter;
174   tao->ksp_its = gn->subsolver->ksp_its;
175   tao->ksp_tot_its = gn->subsolver->ksp_tot_its;
176   ierr = TaoGetConvergedReason(gn->subsolver, &tao->reason);CHKERRQ(ierr);
177   /* Update vectors */
178   ierr = VecCopy(gn->subsolver->solution, tao->solution);CHKERRQ(ierr);
179   ierr = VecCopy(gn->subsolver->gradient, tao->gradient);CHKERRQ(ierr);
180   PetscFunctionReturn(0);
181 }
182 
183 static PetscErrorCode TaoSetFromOptions_BRGN(PetscOptionItems *PetscOptionsObject,Tao tao)
184 {
185   TAO_BRGN              *gn = (TAO_BRGN *)tao->data;
186   PetscErrorCode        ierr;
187 
188   /* XH: Read an option to change epsilon in the structure; follow the lambda function below */
189 
190   PetscFunctionBegin;
191   ierr = PetscOptionsHead(PetscOptionsObject,"Gauss-Newton method for least-squares problems using Tikhonov regularization");CHKERRQ(ierr);
192   ierr = PetscOptionsReal("-tao_brgn_lambda", "Tikhonov regularization factor", "", gn->lambda, &gn->lambda, NULL);CHKERRQ(ierr);
193   ierr = PetscOptionsTail();CHKERRQ(ierr);
194   ierr = TaoSetFromOptions(gn->subsolver);CHKERRQ(ierr);
195   PetscFunctionReturn(0);
196 }
197 
198 static PetscErrorCode TaoView_BRGN(Tao tao, PetscViewer viewer)
199 {
200   TAO_BRGN              *gn = (TAO_BRGN *)tao->data;
201   PetscErrorCode        ierr;
202 
203   PetscFunctionBegin;
204   ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
205   ierr = TaoView(gn->subsolver, viewer);CHKERRQ(ierr);
206   ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
207   PetscFunctionReturn(0);
208 }
209 
210 static PetscErrorCode TaoSetUp_BRGN(Tao tao)
211 {
212   TAO_BRGN              *gn = (TAO_BRGN *)tao->data;
213   PetscErrorCode        ierr;
214   PetscBool             is_bnls, is_bntr, is_bntl;
215   PetscInt              i, nx, Nx;
216 
217   PetscFunctionBegin;
218   if (!tao->ls_res) SETERRQ(PetscObjectComm((PetscObject)tao), PETSC_ERR_ORDER, "TaoSetResidualRoutine() must be called before setup!");
219   ierr = PetscObjectTypeCompare((PetscObject)gn->subsolver, TAOBNLS, &is_bnls);CHKERRQ(ierr);
220   ierr = PetscObjectTypeCompare((PetscObject)gn->subsolver, TAOBNTR, &is_bntr);CHKERRQ(ierr);
221   ierr = PetscObjectTypeCompare((PetscObject)gn->subsolver, TAOBNTL, &is_bntl);CHKERRQ(ierr);
222   if ((is_bnls || is_bntr || is_bntl) && !tao->ls_jac) SETERRQ(PetscObjectComm((PetscObject)tao), PETSC_ERR_ORDER, "TaoSetResidualJacobianRoutine() must be called before setup!");
223   if (!tao->gradient){
224     ierr = VecDuplicate(tao->solution, &tao->gradient);CHKERRQ(ierr);
225   }
226   if (!gn->x_work){
227     ierr = VecDuplicate(tao->solution, &gn->x_work);CHKERRQ(ierr);
228   }
229   if (!gn->r_work){
230     ierr = VecDuplicate(tao->ls_res, &gn->r_work);CHKERRQ(ierr);
231   }
232   if (!gn->x_old) {
233     ierr = VecDuplicate(tao->solution, &gn->x_old);CHKERRQ(ierr);
234     ierr = VecSet(gn->x_old, 0.0);CHKERRQ(ierr);
235   }
236 
237   /* XH: Create diag vector */
238 
239   if (!tao->setupcalled) {
240     /* Hessian setup */
241     ierr = VecGetLocalSize(tao->solution, &nx);CHKERRQ(ierr);
242     ierr = VecGetSize(tao->solution, &Nx);CHKERRQ(ierr);
243     ierr = MatSetSizes(gn->H, nx, nx, Nx, Nx);CHKERRQ(ierr);
244     ierr = MatSetType(gn->H, MATSHELL);CHKERRQ(ierr);
245     ierr = MatSetUp(gn->H);CHKERRQ(ierr);
246     ierr = MatShellSetOperation(gn->H, MATOP_MULT, (void (*)(void))GNHessianProd);CHKERRQ(ierr);
247     ierr = MatShellSetContext(gn->H, (void*)gn);CHKERRQ(ierr);
248     /* Subsolver setup */
249     ierr = TaoSetUpdate(gn->subsolver, GNHookFunction, (void*)gn);CHKERRQ(ierr);
250     ierr = TaoSetInitialVector(gn->subsolver, tao->solution);CHKERRQ(ierr);
251     if (tao->bounded) {
252       ierr = TaoSetVariableBounds(gn->subsolver, tao->XL, tao->XU);CHKERRQ(ierr);
253     }
254     ierr = TaoSetResidualRoutine(gn->subsolver, tao->ls_res, tao->ops->computeresidual, tao->user_lsresP);CHKERRQ(ierr);
255     ierr = TaoSetJacobianResidualRoutine(gn->subsolver, tao->ls_jac, tao->ls_jac, tao->ops->computeresidualjacobian, tao->user_lsjacP);CHKERRQ(ierr);
256     ierr = TaoSetObjectiveAndGradientRoutine(gn->subsolver, GNObjectiveGradientEval, (void*)gn);CHKERRQ(ierr);
257     ierr = TaoSetHessianRoutine(gn->subsolver, gn->H, gn->H, GNComputeHessian, (void*)gn);CHKERRQ(ierr);
258     /* Propagate some options down */
259     ierr = TaoSetTolerances(gn->subsolver, tao->gatol, tao->grtol, tao->gttol);CHKERRQ(ierr);
260     ierr = TaoSetMaximumIterations(gn->subsolver, tao->max_it);CHKERRQ(ierr);
261     ierr = TaoSetMaximumFunctionEvaluations(gn->subsolver, tao->max_funcs);CHKERRQ(ierr);
262     for (i=0; i<tao->numbermonitors; ++i) {
263       ierr = TaoSetMonitor(gn->subsolver, tao->monitor[i], tao->monitorcontext[i], tao->monitordestroy[i]);CHKERRQ(ierr);
264       ierr = PetscObjectReference((PetscObject)(tao->monitorcontext[i]));CHKERRQ(ierr);
265     }
266     ierr = TaoSetUp(gn->subsolver);CHKERRQ(ierr);
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 static PetscErrorCode TaoDestroy_BRGN(Tao tao)
272 {
273   TAO_BRGN              *gn = (TAO_BRGN *)tao->data;
274   PetscErrorCode        ierr;
275 
276   PetscFunctionBegin;
277   if (tao->setupcalled) {
278     ierr = VecDestroy(&tao->gradient);CHKERRQ(ierr);
279     ierr = VecDestroy(&gn->x_work);CHKERRQ(ierr);
280     ierr = VecDestroy(&gn->r_work);CHKERRQ(ierr);
281     ierr = VecDestroy(&gn->x_old);CHKERRQ(ierr);
282 
283     /* XH: Destroy diagonal vector */
284   }
285   ierr = MatDestroy(&gn->H);CHKERRQ(ierr);
286   ierr = TaoDestroy(&gn->subsolver);CHKERRQ(ierr);
287   gn->parent = NULL;
288   ierr = PetscFree(tao->data);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 /*MC
293   TAOBRGN - Bounded Regularized Gauss-Newton method for solving nonlinear least-squares
294             problems with bound constraints. This algorithm is a thin wrapper around TAOBNTL
295             that constructs the Guass-Newton problem with the user-provided least-squares
296             residual and Jacobian. The problem is regularized with an L2-norm proximal point
297             term.
298 
299   Options Database Keys:
300   + -tao_bqnk_max_cg_its - maximum number of bounded conjugate-gradient iterations taken in each Newton loop
301   . -tao_bqnk_init_type - trust radius initialization method ("constant", "direction", "interpolation")
302   . -tao_bqnk_update_type - trust radius update method ("step", "direction", "interpolation")
303   - -tao_bqnk_as_type - active-set estimation method ("none", "bertsekas")
304 
305   Level: beginner
306 M*/
307 PETSC_EXTERN PetscErrorCode TaoCreate_BRGN(Tao tao)
308 {
309   TAO_BRGN       *gn;
310   PetscErrorCode ierr;
311 
312   PetscFunctionBegin;
313   ierr = PetscNewLog(tao,&gn);CHKERRQ(ierr);
314 
315   tao->ops->destroy = TaoDestroy_BRGN;
316   tao->ops->setup = TaoSetUp_BRGN;
317   tao->ops->setfromoptions = TaoSetFromOptions_BRGN;
318   tao->ops->view = TaoView_BRGN;
319   tao->ops->solve = TaoSolve_BRGN;
320 
321   /* XH: initialize the epsilon */
322 
323   tao->data = (void*)gn;
324   gn->lambda = 1e-4;
325   gn->parent = tao;
326 
327   ierr = MatCreate(PetscObjectComm((PetscObject)tao), &gn->H);CHKERRQ(ierr);
328   ierr = MatSetOptionsPrefix(gn->H, "tao_brgn_hessian_");CHKERRQ(ierr);
329 
330   ierr = TaoCreate(PetscObjectComm((PetscObject)tao), &gn->subsolver);CHKERRQ(ierr);
331   ierr = TaoSetType(gn->subsolver, TAOBNLS);CHKERRQ(ierr);
332   ierr = TaoSetOptionsPrefix(gn->subsolver, "tao_brgn_subsolver_");CHKERRQ(ierr);
333   PetscFunctionReturn(0);
334 }
335 
336 /*@C
337   TaoBRGNGetSubsolver - Get the pointer to the subsolver inside BRGN
338 
339   Collective on Tao
340 
341   Level: developer
342 
343   Input Parameters:
344 +  tao - the Tao solver context
345 -  subsolver - the Tao sub-solver context
346 @*/
347 PetscErrorCode TaoBRGNGetSubsolver(Tao tao, Tao *subsolver)
348 {
349   TAO_BRGN       *gn = (TAO_BRGN *)tao->data;
350 
351   PetscFunctionBegin;
352   *subsolver = gn->subsolver;
353   PetscFunctionReturn(0);
354 }
355 
356 /*@C
357   TaoBRGNSetTikhonovLambda - Set the Tikhonov regularization factor for the Gauss-Newton least-squares algorithm
358 
359   Collective on Tao
360 
361   Level: developer
362 
363   Input Parameters:
364 +  tao - the Tao solver context
365 -  lambda - Tikhonov regularization factor
366 @*/
367 PetscErrorCode TaoBRGNSetTikhonovLambda(Tao tao, PetscReal lambda)
368 {
369   TAO_BRGN       *gn = (TAO_BRGN *)tao->data;
370 
371   /* Initialize epsilon here */
372 
373   PetscFunctionBegin;
374   gn->lambda = lambda;
375   PetscFunctionReturn(0);
376 }
377 
378 /* XH: Add a routine to TaoBRGNSetEpsilon; follow the SetTikhonovLambda function including the comment */
379 /* XH: Look for BRGNSetTikhonovLambda in the rest of the code; it will appear in a header file somewhere and add TaoBRGNSetEpsilon to that header with the same format */
380 /* XH: Need to add a line to the ftn-custom for the TaoBRGNSetEpsilon function */
381