xref: /petsc/src/tao/leastsquares/impls/pounders/pounders.c (revision 7efe37a1cedd385a2f501b843d47cdf14dfb49ea)
1 #include <../src/tao/leastsquares/impls/pounders/pounders.h>
2 
3 static PetscErrorCode pounders_h(Tao subtao, Vec v, Mat H, Mat Hpre, void *ctx)
4 {
5   PetscFunctionBegin;
6   PetscFunctionReturn(0);
7 }
8 
9 static PetscErrorCode pounders_fg(Tao subtao, Vec x, PetscReal *f, Vec g, void *ctx)
10 {
11   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)ctx;
12   PetscReal     d1, d2;
13 
14   PetscFunctionBegin;
15   /* g = A*x  (add b later)*/
16   PetscCall(MatMult(mfqP->subH, x, g));
17 
18   /* f = 1/2 * x'*(Ax) + b'*x  */
19   PetscCall(VecDot(x, g, &d1));
20   PetscCall(VecDot(mfqP->subb, x, &d2));
21   *f = 0.5 * d1 + d2;
22 
23   /* now  g = g + b */
24   PetscCall(VecAXPY(g, 1.0, mfqP->subb));
25   PetscFunctionReturn(0);
26 }
27 
28 static PetscErrorCode pounders_feval(Tao tao, Vec x, Vec F, PetscReal *fsum)
29 {
30   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
31   PetscInt      i, row, col;
32   PetscReal     fr, fc;
33 
34   PetscFunctionBegin;
35   PetscCall(TaoComputeResidual(tao, x, F));
36   if (tao->res_weights_v) {
37     PetscCall(VecPointwiseMult(mfqP->workfvec, tao->res_weights_v, F));
38     PetscCall(VecDot(mfqP->workfvec, mfqP->workfvec, fsum));
39   } else if (tao->res_weights_w) {
40     *fsum = 0;
41     for (i = 0; i < tao->res_weights_n; i++) {
42       row = tao->res_weights_rows[i];
43       col = tao->res_weights_cols[i];
44       PetscCall(VecGetValues(F, 1, &row, &fr));
45       PetscCall(VecGetValues(F, 1, &col, &fc));
46       *fsum += tao->res_weights_w[i] * fc * fr;
47     }
48   } else {
49     PetscCall(VecDot(F, F, fsum));
50   }
51   PetscCall(PetscInfo(tao, "Least-squares residual norm: %20.19e\n", (double)*fsum));
52   PetscCheck(!PetscIsInfOrNanReal(*fsum), PETSC_COMM_SELF, PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
53   PetscFunctionReturn(0);
54 }
55 
56 static PetscErrorCode gqtwrap(Tao tao, PetscReal *gnorm, PetscReal *qmin)
57 {
58 #if defined(PETSC_USE_REAL_SINGLE)
59   PetscReal atol = 1.0e-5;
60 #else
61   PetscReal atol = 1.0e-10;
62 #endif
63   PetscInt      info, its;
64   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
65 
66   PetscFunctionBegin;
67   if (!mfqP->usegqt) {
68     PetscReal maxval;
69     PetscInt  i, j;
70 
71     PetscCall(VecSetValues(mfqP->subb, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
72     PetscCall(VecAssemblyBegin(mfqP->subb));
73     PetscCall(VecAssemblyEnd(mfqP->subb));
74 
75     PetscCall(VecSet(mfqP->subx, 0.0));
76 
77     PetscCall(VecSet(mfqP->subndel, -1.0));
78     PetscCall(VecSet(mfqP->subpdel, +1.0));
79 
80     /* Complete the lower triangle of the Hessian matrix */
81     for (i = 0; i < mfqP->n; i++) {
82       for (j = i + 1; j < mfqP->n; j++) mfqP->Hres[j + mfqP->n * i] = mfqP->Hres[mfqP->n * j + i];
83     }
84     PetscCall(MatSetValues(mfqP->subH, mfqP->n, mfqP->indices, mfqP->n, mfqP->indices, mfqP->Hres, INSERT_VALUES));
85     PetscCall(MatAssemblyBegin(mfqP->subH, MAT_FINAL_ASSEMBLY));
86     PetscCall(MatAssemblyEnd(mfqP->subH, MAT_FINAL_ASSEMBLY));
87 
88     PetscCall(TaoResetStatistics(mfqP->subtao));
89     /* PetscCall(TaoSetTolerances(mfqP->subtao,*gnorm,*gnorm,PETSC_DEFAULT)); */
90     /* enforce bound constraints -- experimental */
91     if (tao->XU && tao->XL) {
92       PetscCall(VecCopy(tao->XU, mfqP->subxu));
93       PetscCall(VecAXPY(mfqP->subxu, -1.0, tao->solution));
94       PetscCall(VecScale(mfqP->subxu, 1.0 / mfqP->delta));
95       PetscCall(VecCopy(tao->XL, mfqP->subxl));
96       PetscCall(VecAXPY(mfqP->subxl, -1.0, tao->solution));
97       PetscCall(VecScale(mfqP->subxl, 1.0 / mfqP->delta));
98 
99       PetscCall(VecPointwiseMin(mfqP->subxu, mfqP->subxu, mfqP->subpdel));
100       PetscCall(VecPointwiseMax(mfqP->subxl, mfqP->subxl, mfqP->subndel));
101     } else {
102       PetscCall(VecCopy(mfqP->subpdel, mfqP->subxu));
103       PetscCall(VecCopy(mfqP->subndel, mfqP->subxl));
104     }
105     /* Make sure xu > xl */
106     PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
107     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
108     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
109     PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "upper bound < lower bound in subproblem");
110     /* Make sure xu > tao->solution > xl */
111     PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
112     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx));
113     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
114     PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess < lower bound in subproblem");
115 
116     PetscCall(VecCopy(mfqP->subx, mfqP->subpdel));
117     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
118     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
119     PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess > upper bound in subproblem");
120 
121     PetscCall(TaoSolve(mfqP->subtao));
122     PetscCall(TaoGetSolutionStatus(mfqP->subtao, NULL, qmin, NULL, NULL, NULL, NULL));
123 
124     /* test bounds post-solution*/
125     PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
126     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx));
127     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
128     if (maxval > 1e-5) {
129       PetscCall(PetscInfo(tao, "subproblem solution < lower bound\n"));
130       tao->reason = TAO_DIVERGED_TR_REDUCTION;
131     }
132 
133     PetscCall(VecCopy(mfqP->subx, mfqP->subpdel));
134     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
135     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
136     if (maxval > 1e-5) {
137       PetscCall(PetscInfo(tao, "subproblem solution > upper bound\n"));
138       tao->reason = TAO_DIVERGED_TR_REDUCTION;
139     }
140   } else {
141     gqt(mfqP->n, mfqP->Hres, mfqP->n, mfqP->Gres, 1.0, mfqP->gqt_rtol, atol, mfqP->gqt_maxits, gnorm, qmin, mfqP->Xsubproblem, &info, &its, mfqP->work, mfqP->work2, mfqP->work3);
142   }
143   *qmin *= -1;
144   PetscFunctionReturn(0);
145 }
146 
147 static PetscErrorCode pounders_update_res(Tao tao)
148 {
149   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
150   PetscInt      i, row, col;
151   PetscBLASInt  blasn = mfqP->n, blasn2 = blasn * blasn, blasm = mfqP->m, ione = 1;
152   PetscReal     zero = 0.0, one = 1.0, wii, factor;
153 
154   PetscFunctionBegin;
155   for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] = 0;
156   for (i = 0; i < mfqP->n * mfqP->n; i++) mfqP->Hres[i] = 0;
157 
158   /* Compute Gres= sum_ij[wij * (cjgi + cigj)] */
159   if (tao->res_weights_v) {
160     /* Vector(diagonal) weights: gres = sum_i(wii*ci*gi) */
161     for (i = 0; i < mfqP->m; i++) {
162       PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &factor));
163       factor = factor * mfqP->C[i];
164       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * i], &ione, mfqP->Gres, &ione));
165     }
166 
167     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
168     /* vector(diagonal weights) Hres = sum_i(wii*(ci*Hi + gi * gi')*/
169     for (i = 0; i < mfqP->m; i++) {
170       PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &wii));
171       if (tao->niter > 1) {
172         factor = wii * mfqP->C[i];
173         /* add wii * ci * Hi */
174         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
175       }
176       /* add wii * gi * gi' */
177       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &wii, &mfqP->Fdiff[blasn * i], &blasn, &mfqP->Fdiff[blasn * i], &blasn, &one, mfqP->Hres, &blasn));
178     }
179   } else if (tao->res_weights_w) {
180     /* General case: .5 * Gres= sum_ij[wij * (cjgi + cigj)] */
181     for (i = 0; i < tao->res_weights_n; i++) {
182       row = tao->res_weights_rows[i];
183       col = tao->res_weights_cols[i];
184 
185       factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
186       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * row], &ione, mfqP->Gres, &ione));
187       factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
188       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * col], &ione, mfqP->Gres, &ione));
189     }
190 
191     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
192     /* .5 * sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
193     for (i = 0; i < tao->res_weights_n; i++) {
194       row    = tao->res_weights_rows[i];
195       col    = tao->res_weights_cols[i];
196       factor = tao->res_weights_w[i] / 2.0;
197       /* add wij * gi gj' + wij * gj gi' */
198       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * row], &blasn, &mfqP->Fdiff[blasn * col], &blasn, &one, mfqP->Hres, &blasn));
199       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * col], &blasn, &mfqP->Fdiff[blasn * row], &blasn, &one, mfqP->Hres, &blasn));
200     }
201     if (tao->niter > 1) {
202       for (i = 0; i < tao->res_weights_n; i++) {
203         row = tao->res_weights_rows[i];
204         col = tao->res_weights_cols[i];
205 
206         /* add  wij*cj*Hi */
207         factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
208         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[row], &blasm, mfqP->Hres, &ione));
209 
210         /* add wij*ci*Hj */
211         factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
212         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[col], &blasm, mfqP->Hres, &ione));
213       }
214     }
215   } else {
216     /* Default: Gres= sum_i[cigi] = G*c' */
217     PetscCall(PetscInfo(tao, "Identity weights\n"));
218     PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->C, &ione, &zero, mfqP->Gres, &ione));
219 
220     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
221     /*  Hres = G*G' + 0.5 sum {F(xkin,i)*H(:,:,i)}  */
222     PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->Fdiff, &blasn, &zero, mfqP->Hres, &blasn));
223 
224     /* sum(F(xkin,i)*H(:,:,i)) */
225     if (tao->niter > 1) {
226       for (i = 0; i < mfqP->m; i++) {
227         factor = mfqP->C[i];
228         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
229       }
230     }
231   }
232   PetscFunctionReturn(0);
233 }
234 
235 static PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi)
236 {
237   /* Phi = .5*[x(1)^2  sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */
238   PetscInt  i, j, k;
239   PetscReal sqrt2 = PetscSqrtReal(2.0);
240 
241   PetscFunctionBegin;
242   j = 0;
243   for (i = 0; i < n; i++) {
244     phi[j] = 0.5 * x[i] * x[i];
245     j++;
246     for (k = i + 1; k < n; k++) {
247       phi[j] = x[i] * x[k] / sqrt2;
248       j++;
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 static PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP)
255 {
256   /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x'
257    that satisfies the interpolation conditions Q(X[:,j]) = f(j)
258    for j=1,...,m and with a Hessian matrix of least Frobenius norm */
259 
260   /* NB --we are ignoring c */
261   PetscInt     i, j, k, num, np = mfqP->nmodelpoints;
262   PetscReal    one = 1.0, zero = 0.0, negone = -1.0;
263   PetscBLASInt blasnpmax  = mfqP->npmax;
264   PetscBLASInt blasnplus1 = mfqP->n + 1;
265   PetscBLASInt blasnp     = np;
266   PetscBLASInt blasint    = mfqP->n * (mfqP->n + 1) / 2;
267   PetscBLASInt blasint2   = np - mfqP->n - 1;
268   PetscBLASInt info, ione = 1;
269   PetscReal    sqrt2 = PetscSqrtReal(2.0);
270 
271   PetscFunctionBegin;
272   for (i = 0; i < mfqP->n * mfqP->m; i++) mfqP->Gdel[i] = 0;
273   for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; i++) mfqP->Hdel[i] = 0;
274 
275   /* factor M */
276   PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&blasnplus1, &blasnp, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &info));
277   PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrf returned with value %" PetscBLASInt_FMT, info);
278 
279   if (np == mfqP->n + 1) {
280     for (i = 0; i < mfqP->npmax - mfqP->n - 1; i++) mfqP->omega[i] = 0.0;
281     for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->beta[i] = 0.0;
282   } else {
283     /* Let Ltmp = (L'*L) */
284     PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &blasint2, &blasint2, &blasint, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &zero, mfqP->L_tmp, &blasint));
285 
286     /* factor Ltmp */
287     PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &blasint2, mfqP->L_tmp, &blasint, &info));
288     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrf returned with value %" PetscBLASInt_FMT, info);
289   }
290 
291   for (k = 0; k < mfqP->m; k++) {
292     if (np != mfqP->n + 1) {
293       /* Solve L'*L*Omega = Z' * RESk*/
294       PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasnp, &blasint2, &one, mfqP->Z, &blasnpmax, &mfqP->RES[mfqP->npmax * k], &ione, &zero, mfqP->omega, &ione));
295       PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &blasint2, &ione, mfqP->L_tmp, &blasint, mfqP->omega, &blasint2, &info));
296       PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrs returned with value %" PetscBLASInt_FMT, info);
297 
298       /* Beta = L*Omega */
299       PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasint, &blasint2, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, mfqP->omega, &ione, &zero, mfqP->beta, &ione));
300     }
301 
302     /* solve M'*Alpha = RESk - N'*Beta */
303     PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasint, &blasnp, &negone, mfqP->N, &blasint, mfqP->beta, &ione, &one, &mfqP->RES[mfqP->npmax * k], &ione));
304     PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &blasnplus1, &ione, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &mfqP->RES[mfqP->npmax * k], &blasnplus1, &info));
305     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrs returned with value %" PetscBLASInt_FMT, info);
306 
307     /* Gdel(:,k) = Alpha(2:n+1) */
308     for (i = 0; i < mfqP->n; i++) mfqP->Gdel[i + mfqP->n * k] = mfqP->RES[mfqP->npmax * k + i + 1];
309 
310     /* Set Hdels */
311     num = 0;
312     for (i = 0; i < mfqP->n; i++) {
313       /* H[i,i,k] = Beta(num) */
314       mfqP->Hdel[(i * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num];
315       num++;
316       for (j = i + 1; j < mfqP->n; j++) {
317         /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */
318         mfqP->Hdel[(j * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
319         mfqP->Hdel[(i * mfqP->n + j) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
320         num++;
321       }
322     }
323   }
324   PetscFunctionReturn(0);
325 }
326 
327 static PetscErrorCode morepoints(TAO_POUNDERS *mfqP)
328 {
329   /* Assumes mfqP->model_indices[0]  is minimum index
330    Finishes adding points to mfqP->model_indices (up to npmax)
331    Computes L,Z,M,N
332    np is actual number of points in model (should equal npmax?) */
333   PetscInt         point, i, j, offset;
334   PetscInt         reject;
335   PetscBLASInt     blasn = mfqP->n, blasnpmax = mfqP->npmax, blasnplus1 = mfqP->n + 1, info, blasnmax = mfqP->nmax, blasint, blasint2, blasnp, blasmaxmn;
336   const PetscReal *x;
337   PetscReal        normd;
338 
339   PetscFunctionBegin;
340   /* Initialize M,N */
341   for (i = 0; i < mfqP->n + 1; i++) {
342     PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x));
343     mfqP->M[(mfqP->n + 1) * i] = 1.0;
344     for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * i)] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
345     PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x));
346     PetscCall(phi2eval(&mfqP->M[1 + ((mfqP->n + 1) * i)], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * i]));
347   }
348 
349   /* Now we add points until we have npmax starting with the most recent ones */
350   point              = mfqP->nHist - 1;
351   mfqP->nmodelpoints = mfqP->n + 1;
352   while (mfqP->nmodelpoints < mfqP->npmax && point >= 0) {
353     /* Reject any points already in the model */
354     reject = 0;
355     for (j = 0; j < mfqP->n + 1; j++) {
356       if (point == mfqP->model_indices[j]) {
357         reject = 1;
358         break;
359       }
360     }
361 
362     /* Reject if norm(d) >c2 */
363     if (!reject) {
364       PetscCall(VecCopy(mfqP->Xhist[point], mfqP->workxvec));
365       PetscCall(VecAXPY(mfqP->workxvec, -1.0, mfqP->Xhist[mfqP->minindex]));
366       PetscCall(VecNorm(mfqP->workxvec, NORM_2, &normd));
367       normd /= mfqP->delta;
368       if (normd > mfqP->c2) reject = 1;
369     }
370     if (reject) {
371       point--;
372       continue;
373     }
374 
375     PetscCall(VecGetArrayRead(mfqP->Xhist[point], &x));
376     mfqP->M[(mfqP->n + 1) * mfqP->nmodelpoints] = 1.0;
377     for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * mfqP->nmodelpoints)] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
378     PetscCall(VecRestoreArrayRead(mfqP->Xhist[point], &x));
379     PetscCall(phi2eval(&mfqP->M[1 + (mfqP->n + 1) * mfqP->nmodelpoints], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * (mfqP->nmodelpoints)]));
380 
381     /* Update QR factorization */
382     /* Copy M' to Q_tmp */
383     for (i = 0; i < mfqP->n + 1; i++) {
384       for (j = 0; j < mfqP->npmax; j++) mfqP->Q_tmp[j + mfqP->npmax * i] = mfqP->M[i + (mfqP->n + 1) * j];
385     }
386     blasnp = mfqP->nmodelpoints + 1;
387     /* Q_tmp,R = qr(M') */
388     blasmaxmn = PetscMax(mfqP->m, mfqP->n + 1);
389     PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->mwork, &blasmaxmn, &info));
390     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine geqrf returned with value %" PetscBLASInt_FMT, info);
391 
392     /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */
393     /* L = N*Qtmp */
394     blasint2 = mfqP->n * (mfqP->n + 1) / 2;
395     /* Copy N to L_tmp */
396     for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2 * mfqP->npmax; i++) mfqP->L_tmp[i] = mfqP->N[i];
397     /* Copy L_save to L_tmp */
398 
399     /* L_tmp = N*Qtmp' */
400     PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasint2, &blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->L_tmp, &blasint2, mfqP->npmaxwork, &blasnmax, &info));
401     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info);
402 
403     /* Copy L_tmp to L_save */
404     for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L_save[i] = mfqP->L_tmp[i];
405 
406     /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */
407     blasint = mfqP->nmodelpoints - mfqP->n;
408     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
409     PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &blasint2, &blasint, &mfqP->L_tmp[(mfqP->n + 1) * blasint2], &blasint2, mfqP->beta, mfqP->work, &blasn, mfqP->work, &blasn, mfqP->npmaxwork, &blasnmax, &info));
410     PetscCall(PetscFPTrapPop());
411     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine gesvd returned with value %" PetscBLASInt_FMT, info);
412 
413     if (mfqP->beta[PetscMin(blasint, blasint2) - 1] > mfqP->theta2) {
414       /* accept point */
415       mfqP->model_indices[mfqP->nmodelpoints] = point;
416       /* Copy Q_tmp to Q */
417       for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q[i] = mfqP->Q_tmp[i];
418       for (i = 0; i < mfqP->npmax; i++) mfqP->tau[i] = mfqP->tau_tmp[i];
419       mfqP->nmodelpoints++;
420       blasnp = mfqP->nmodelpoints;
421 
422       /* Copy L_save to L */
423       for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = mfqP->L_save[i];
424     }
425     point--;
426   }
427 
428   blasnp = mfqP->nmodelpoints;
429   /* Copy Q(:,n+2:np) to Z */
430   /* First set Q_tmp to I */
431   for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q_tmp[i] = 0.0;
432   for (i = 0; i < mfqP->npmax; i++) mfqP->Q_tmp[i + mfqP->npmax * i] = 1.0;
433 
434   /* Q_tmp = I * Q */
435   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasnp, &blasnp, &blasnplus1, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));
436   PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info);
437 
438   /* Copy Q_tmp(:,n+2:np) to Z) */
439   offset = mfqP->npmax * (mfqP->n + 1);
440   for (i = offset; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Z[i - offset] = mfqP->Q_tmp[i];
441 
442   if (mfqP->nmodelpoints == mfqP->n + 1) {
443     /* Set L to I_{n+1} */
444     for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = 0.0;
445     for (i = 0; i < mfqP->n; i++) mfqP->L[(mfqP->n * (mfqP->n + 1) / 2) * i + i] = 1.0;
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */
451 static PetscErrorCode addpoint(Tao tao, TAO_POUNDERS *mfqP, PetscInt index)
452 {
453   PetscFunctionBegin;
454   /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/
455   PetscCall(VecDuplicate(mfqP->Xhist[0], &mfqP->Xhist[mfqP->nHist]));
456   PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, &mfqP->Q_tmp[index * mfqP->npmax], INSERT_VALUES));
457   PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]));
458   PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]));
459   PetscCall(VecAYPX(mfqP->Xhist[mfqP->nHist], mfqP->delta, mfqP->Xhist[mfqP->minindex]));
460 
461   /* Project into feasible region */
462   if (tao->XU && tao->XL) PetscCall(VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU, mfqP->Xhist[mfqP->nHist]));
463 
464   /* Compute value of new vector */
465   PetscCall(VecDuplicate(mfqP->Fhist[0], &mfqP->Fhist[mfqP->nHist]));
466   CHKMEMQ;
467   PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]));
468 
469   /* Add new vector to model */
470   mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist;
471   mfqP->nmodelpoints++;
472   mfqP->nHist++;
473   PetscFunctionReturn(0);
474 }
475 
476 static PetscErrorCode modelimprove(Tao tao, TAO_POUNDERS *mfqP, PetscInt addallpoints)
477 {
478   /* modeld = Q(:,np+1:n)' */
479   PetscInt     i, j, minindex = 0;
480   PetscReal    dp, half = 0.5, one = 1.0, minvalue = PETSC_INFINITY;
481   PetscBLASInt blasn = mfqP->n, blasnpmax = mfqP->npmax, blask, info;
482   PetscBLASInt blas1 = 1, blasnmax = mfqP->nmax;
483 
484   PetscFunctionBegin;
485   blask = mfqP->nmodelpoints;
486   /* Qtmp = I(n x n) */
487   for (i = 0; i < mfqP->n; i++) {
488     for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i + mfqP->npmax * j] = 0.0;
489   }
490   for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[j + mfqP->npmax * j] = 1.0;
491 
492   /* Qtmp = Q * I */
493   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasn, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));
494 
495   for (i = mfqP->nmodelpoints; i < mfqP->n; i++) {
496     PetscCallBLAS("BLASdot", dp = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->Gres, &blas1));
497     if (dp > 0.0) { /* Model says use the other direction! */
498       for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i * mfqP->npmax + j] *= -1;
499     }
500     /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */
501     for (j = 0; j < mfqP->n; j++) mfqP->work2[j] = mfqP->Gres[j];
502     PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &half, mfqP->Hres, &blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, &one, mfqP->work2, &blas1));
503     PetscCallBLAS("BLASdot", mfqP->work[i] = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->work2, &blas1));
504     if (i == mfqP->nmodelpoints || mfqP->work[i] < minvalue) {
505       minindex = i;
506       minvalue = mfqP->work[i];
507     }
508     if (addallpoints != 0) PetscCall(addpoint(tao, mfqP, i));
509   }
510   if (!addallpoints) PetscCall(addpoint(tao, mfqP, minindex));
511   PetscFunctionReturn(0);
512 }
513 
514 static PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, PetscReal c)
515 {
516   PetscInt         i, j;
517   PetscBLASInt     blasm = mfqP->m, blasj, blask, blasn = mfqP->n, ione = 1, info;
518   PetscBLASInt     blasnpmax = mfqP->npmax, blasmaxmn;
519   PetscReal        proj, normd;
520   const PetscReal *x;
521 
522   PetscFunctionBegin;
523   for (i = mfqP->nHist - 1; i >= 0; i--) {
524     PetscCall(VecGetArrayRead(mfqP->Xhist[i], &x));
525     for (j = 0; j < mfqP->n; j++) mfqP->work[j] = (x[j] - xmin[j]) / mfqP->delta;
526     PetscCall(VecRestoreArrayRead(mfqP->Xhist[i], &x));
527     PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, mfqP->work2, &ione));
528     PetscCallBLAS("BLASnrm2", normd = BLASnrm2_(&blasn, mfqP->work, &ione));
529     if (normd <= c) {
530       blasj = PetscMax((mfqP->n - mfqP->nmodelpoints), 0);
531       if (!mfqP->q_is_I) {
532         /* project D onto null */
533         blask = (mfqP->nmodelpoints);
534         PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &ione, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->work2, &ione, mfqP->mwork, &blasm, &info));
535         PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "ormqr returned value %" PetscBLASInt_FMT, info);
536       }
537       PetscCallBLAS("BLASnrm2", proj = BLASnrm2_(&blasj, &mfqP->work2[mfqP->nmodelpoints], &ione));
538 
539       if (proj >= mfqP->theta1) { /* add this index to model */
540         mfqP->model_indices[mfqP->nmodelpoints] = i;
541         mfqP->nmodelpoints++;
542         PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, &mfqP->Q_tmp[mfqP->npmax * (mfqP->nmodelpoints - 1)], &ione));
543         blask = mfqP->npmax * (mfqP->nmodelpoints);
544         PetscCallBLAS("BLAScopy", BLAScopy_(&blask, mfqP->Q_tmp, &ione, mfqP->Q, &ione));
545         blask     = mfqP->nmodelpoints;
546         blasmaxmn = PetscMax(mfqP->m, mfqP->n);
547         PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->mwork, &blasmaxmn, &info));
548         PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "geqrf returned value %" PetscBLASInt_FMT, info);
549         mfqP->q_is_I = 0;
550       }
551       if (mfqP->nmodelpoints == mfqP->n) break;
552     }
553   }
554 
555   PetscFunctionReturn(0);
556 }
557 
558 static PetscErrorCode TaoSolve_POUNDERS(Tao tao)
559 {
560   TAO_POUNDERS    *mfqP = (TAO_POUNDERS *)tao->data;
561   PetscInt         i, ii, j, k, l;
562   PetscReal        step = 1.0;
563   PetscInt         low, high;
564   PetscReal        minnorm;
565   PetscReal       *x, *f;
566   const PetscReal *xmint, *fmin;
567   PetscReal        deltaold;
568   PetscReal        gnorm;
569   PetscBLASInt     info, ione = 1, iblas;
570   PetscBool        valid, same;
571   PetscReal        mdec, rho, normxsp;
572   PetscReal        one = 1.0, zero = 0.0, ratio;
573   PetscBLASInt     blasm, blasn, blasncopy, blasnpmax;
574   static PetscBool set = PETSC_FALSE;
575 
576   /* n = # of parameters
577      m = dimension (components) of function  */
578   PetscFunctionBegin;
579   PetscCall(PetscCitationsRegister("@article{UNEDF0,\n"
580                                    "title = {Nuclear energy density optimization},\n"
581                                    "author = {Kortelainen, M.  and Lesinski, T.  and Mor\'e, J.  and Nazarewicz, W.\n"
582                                    "          and Sarich, J.  and Schunck, N.  and Stoitsov, M. V. and Wild, S. },\n"
583                                    "journal = {Phys. Rev. C},\n"
584                                    "volume = {82},\n"
585                                    "number = {2},\n"
586                                    "pages = {024313},\n"
587                                    "numpages = {18},\n"
588                                    "year = {2010},\n"
589                                    "month = {Aug},\n"
590                                    "doi = {10.1103/PhysRevC.82.024313}\n}\n",
591                                    &set));
592   tao->niter = 0;
593   if (tao->XL && tao->XU) {
594     /* Check x0 <= XU */
595     PetscReal val;
596 
597     PetscCall(VecCopy(tao->solution, mfqP->Xhist[0]));
598     PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU));
599     PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
600     PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 > upper bound");
601 
602     /* Check x0 >= xl */
603     PetscCall(VecCopy(tao->XL, mfqP->Xhist[0]));
604     PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->solution));
605     PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
606     PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 < lower bound");
607 
608     /* Check x0 + delta < XU  -- should be able to get around this eventually */
609 
610     PetscCall(VecSet(mfqP->Xhist[0], mfqP->delta));
611     PetscCall(VecAXPY(mfqP->Xhist[0], 1.0, tao->solution));
612     PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU));
613     PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
614     PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 + delta > upper bound");
615   }
616 
617   blasm     = mfqP->m;
618   blasn     = mfqP->n;
619   blasnpmax = mfqP->npmax;
620   for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; ++i) mfqP->H[i] = 0;
621 
622   PetscCall(VecCopy(tao->solution, mfqP->Xhist[0]));
623 
624   /* This provides enough information to approximate the gradient of the objective */
625   /* using a forward difference scheme. */
626 
627   PetscCall(PetscInfo(tao, "Initialize simplex; delta = %10.9e\n", (double)mfqP->delta));
628   PetscCall(pounders_feval(tao, mfqP->Xhist[0], mfqP->Fhist[0], &mfqP->Fres[0]));
629   mfqP->minindex = 0;
630   minnorm        = mfqP->Fres[0];
631 
632   PetscCall(VecGetOwnershipRange(mfqP->Xhist[0], &low, &high));
633   for (i = 1; i < mfqP->n + 1; ++i) {
634     PetscCall(VecCopy(mfqP->Xhist[0], mfqP->Xhist[i]));
635 
636     if (i - 1 >= low && i - 1 < high) {
637       PetscCall(VecGetArray(mfqP->Xhist[i], &x));
638       x[i - 1 - low] += mfqP->delta;
639       PetscCall(VecRestoreArray(mfqP->Xhist[i], &x));
640     }
641     CHKMEMQ;
642     PetscCall(pounders_feval(tao, mfqP->Xhist[i], mfqP->Fhist[i], &mfqP->Fres[i]));
643     if (mfqP->Fres[i] < minnorm) {
644       mfqP->minindex = i;
645       minnorm        = mfqP->Fres[i];
646     }
647   }
648   PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
649   PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res));
650   PetscCall(PetscInfo(tao, "Finalize simplex; minnorm = %10.9e\n", (double)minnorm));
651 
652   /* Gather mpi vecs to one big local vec */
653 
654   /* Begin serial code */
655 
656   /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
657   /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
658   /* (Column oriented for blas calls) */
659   ii = 0;
660 
661   PetscCall(PetscInfo(tao, "Build matrix: %" PetscInt_FMT "\n", (PetscInt)mfqP->size));
662   if (1 == mfqP->size) {
663     PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
664     for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
665     PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
666     PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
667     for (i = 0; i < mfqP->n + 1; i++) {
668       if (i == mfqP->minindex) continue;
669 
670       PetscCall(VecGetArray(mfqP->Xhist[i], &x));
671       for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
672       PetscCall(VecRestoreArray(mfqP->Xhist[i], &x));
673 
674       PetscCall(VecGetArray(mfqP->Fhist[i], &f));
675       for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j];
676       PetscCall(VecRestoreArray(mfqP->Fhist[i], &f));
677 
678       mfqP->model_indices[ii++] = i;
679     }
680     for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j];
681     PetscCall(VecRestoreArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
682   } else {
683     PetscCall(VecSet(mfqP->localxmin, 0));
684     PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD));
685     PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD));
686 
687     PetscCall(VecGetArrayRead(mfqP->localxmin, &xmint));
688     for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
689     PetscCall(VecRestoreArrayRead(mfqP->localxmin, &xmint));
690 
691     PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD));
692     PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD));
693     PetscCall(VecGetArrayRead(mfqP->localfmin, &fmin));
694     for (i = 0; i < mfqP->n + 1; i++) {
695       if (i == mfqP->minindex) continue;
696 
697       PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD));
698       PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD));
699       PetscCall(VecGetArray(mfqP->localx, &x));
700       for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
701       PetscCall(VecRestoreArray(mfqP->localx, &x));
702 
703       PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD));
704       PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD));
705       PetscCall(VecGetArray(mfqP->localf, &f));
706       for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j];
707       PetscCall(VecRestoreArray(mfqP->localf, &f));
708 
709       mfqP->model_indices[ii++] = i;
710     }
711     for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j];
712     PetscCall(VecRestoreArrayRead(mfqP->localfmin, &fmin));
713   }
714 
715   /* Determine the initial quadratic models */
716   /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */
717   /* D (nxn) Fdiff (nxm)  => G (nxm) */
718   blasncopy = blasn;
719   PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&blasn, &blasm, mfqP->Disp, &blasnpmax, mfqP->iwork, mfqP->Fdiff, &blasncopy, &info));
720   PetscCall(PetscInfo(tao, "Linear solve return: %" PetscInt_FMT "\n", (PetscInt)info));
721 
722   PetscCall(pounders_update_res(tao));
723 
724   valid = PETSC_TRUE;
725 
726   PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
727   PetscCall(VecAssemblyBegin(tao->gradient));
728   PetscCall(VecAssemblyEnd(tao->gradient));
729   PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
730   gnorm *= mfqP->delta;
731   PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
732 
733   tao->reason = TAO_CONTINUE_ITERATING;
734   PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its));
735   PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step));
736   PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
737 
738   mfqP->nHist        = mfqP->n + 1;
739   mfqP->nmodelpoints = mfqP->n + 1;
740   PetscCall(PetscInfo(tao, "Initial gradient: %20.19e\n", (double)gnorm));
741 
742   while (tao->reason == TAO_CONTINUE_ITERATING) {
743     PetscReal gnm = 1e-4;
744     /* Call general purpose update function */
745     PetscTryTypeMethod(tao, update, tao->niter, tao->user_update);
746     tao->niter++;
747     /* Solve the subproblem min{Q(s): ||s|| <= 1.0} */
748     PetscCall(gqtwrap(tao, &gnm, &mdec));
749     /* Evaluate the function at the new point */
750 
751     for (i = 0; i < mfqP->n; i++) mfqP->work[i] = mfqP->Xsubproblem[i] * mfqP->delta + mfqP->xmin[i];
752     PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[mfqP->nHist]));
753     PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[mfqP->nHist]));
754     PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, mfqP->work, INSERT_VALUES));
755     PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]));
756     PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]));
757 
758     PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]));
759 
760     rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec;
761     mfqP->nHist++;
762 
763     /* Update the center */
764     if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid == PETSC_TRUE)) {
765       /* Update model to reflect new base point */
766       for (i = 0; i < mfqP->n; i++) mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i]) / mfqP->delta;
767       for (j = 0; j < mfqP->m; j++) {
768         /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work';
769          G(:,j) = G(:,j) + H(:,:,j)*work' */
770         for (k = 0; k < mfqP->n; k++) {
771           mfqP->work2[k] = 0.0;
772           for (l = 0; l < mfqP->n; l++) mfqP->work2[k] += mfqP->H[j + mfqP->m * (k + l * mfqP->n)] * mfqP->work[l];
773         }
774         for (i = 0; i < mfqP->n; i++) {
775           mfqP->C[j] += mfqP->work[i] * (mfqP->Fdiff[i + mfqP->n * j] + 0.5 * mfqP->work2[i]);
776           mfqP->Fdiff[i + mfqP->n * j] += mfqP->work2[i];
777         }
778       }
779       /* Cres += work*Gres + .5*work*Hres*work';
780        Gres += Hres*work'; */
781 
782       PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &one, mfqP->Hres, &blasn, mfqP->work, &ione, &zero, mfqP->work2, &ione));
783       for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] += mfqP->work2[i];
784       mfqP->minindex = mfqP->nHist - 1;
785       minnorm        = mfqP->Fres[mfqP->minindex];
786       PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res));
787       /* Change current center */
788       PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
789       for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
790       PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
791     }
792 
793     /* Evaluate at a model-improving point if necessary */
794     if (valid == PETSC_FALSE) {
795       mfqP->q_is_I       = 1;
796       mfqP->nmodelpoints = 0;
797       PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1));
798       if (mfqP->nmodelpoints < mfqP->n) {
799         PetscCall(PetscInfo(tao, "Model not valid -- model-improving\n"));
800         PetscCall(modelimprove(tao, mfqP, 1));
801       }
802     }
803 
804     /* Update the trust region radius */
805     deltaold = mfqP->delta;
806     normxsp  = 0;
807     for (i = 0; i < mfqP->n; i++) normxsp += mfqP->Xsubproblem[i] * mfqP->Xsubproblem[i];
808     normxsp = PetscSqrtReal(normxsp);
809     if (rho >= mfqP->eta1 && normxsp > 0.5 * mfqP->delta) {
810       mfqP->delta = PetscMin(mfqP->delta * mfqP->gamma1, mfqP->deltamax);
811     } else if (valid == PETSC_TRUE) {
812       mfqP->delta = PetscMax(mfqP->delta * mfqP->gamma0, mfqP->deltamin);
813     }
814 
815     /* Compute the next interpolation set */
816     mfqP->q_is_I       = 1;
817     mfqP->nmodelpoints = 0;
818     PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c1 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c1));
819     PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1));
820     if (mfqP->nmodelpoints == mfqP->n) {
821       valid = PETSC_TRUE;
822     } else {
823       valid = PETSC_FALSE;
824       PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c2 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c2));
825       PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c2));
826       if (mfqP->n > mfqP->nmodelpoints) {
827         PetscCall(PetscInfo(tao, "Model not valid -- adding geometry points\n"));
828         PetscCall(modelimprove(tao, mfqP, mfqP->n - mfqP->nmodelpoints));
829       }
830     }
831     for (i = mfqP->nmodelpoints; i > 0; i--) mfqP->model_indices[i] = mfqP->model_indices[i - 1];
832     mfqP->nmodelpoints++;
833     mfqP->model_indices[0] = mfqP->minindex;
834     PetscCall(morepoints(mfqP));
835     for (i = 0; i < mfqP->nmodelpoints; i++) {
836       PetscCall(VecGetArray(mfqP->Xhist[mfqP->model_indices[i]], &x));
837       for (j = 0; j < mfqP->n; j++) mfqP->Disp[i + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / deltaold;
838       PetscCall(VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]], &x));
839       PetscCall(VecGetArray(mfqP->Fhist[mfqP->model_indices[i]], &f));
840       for (j = 0; j < mfqP->m; j++) {
841         for (k = 0; k < mfqP->n; k++) {
842           mfqP->work[k] = 0.0;
843           for (l = 0; l < mfqP->n; l++) mfqP->work[k] += mfqP->H[j + mfqP->m * (k + mfqP->n * l)] * mfqP->Disp[i + mfqP->npmax * l];
844         }
845         PetscCallBLAS("BLASdot", mfqP->RES[j * mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn, &mfqP->Fdiff[j * mfqP->n], &ione, &mfqP->Disp[i], &blasnpmax) - 0.5 * BLASdot_(&blasn, mfqP->work, &ione, &mfqP->Disp[i], &blasnpmax) + f[j]);
846       }
847       PetscCall(VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]], &f));
848     }
849 
850     /* Update the quadratic model */
851     PetscCall(PetscInfo(tao, "Get Quad, size: %" PetscInt_FMT ", points: %" PetscInt_FMT "\n", mfqP->n, mfqP->nmodelpoints));
852     PetscCall(getquadpounders(mfqP));
853     PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
854     PetscCallBLAS("BLAScopy", BLAScopy_(&blasm, fmin, &ione, mfqP->C, &ione));
855     /* G = G*(delta/deltaold) + Gdel */
856     ratio = mfqP->delta / deltaold;
857     iblas = blasm * blasn;
858     PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->Fdiff, &ione));
859     PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Gdel, &ione, mfqP->Fdiff, &ione));
860     /* H = H*(delta/deltaold)^2 + Hdel */
861     iblas = blasm * blasn * blasn;
862     ratio *= ratio;
863     PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->H, &ione));
864     PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Hdel, &ione, mfqP->H, &ione));
865 
866     /* Get residuals */
867     PetscCall(pounders_update_res(tao));
868 
869     /* Export solution and gradient residual to TAO */
870     PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
871     PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
872     PetscCall(VecAssemblyBegin(tao->gradient));
873     PetscCall(VecAssemblyEnd(tao->gradient));
874     PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
875     gnorm *= mfqP->delta;
876     /*  final criticality test */
877     PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its));
878     PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step));
879     PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
880     /* test for repeated model */
881     if (mfqP->nmodelpoints == mfqP->last_nmodelpoints) {
882       same = PETSC_TRUE;
883     } else {
884       same = PETSC_FALSE;
885     }
886     for (i = 0; i < mfqP->nmodelpoints; i++) {
887       if (same) {
888         if (mfqP->model_indices[i] == mfqP->last_model_indices[i]) {
889           same = PETSC_TRUE;
890         } else {
891           same = PETSC_FALSE;
892         }
893       }
894       mfqP->last_model_indices[i] = mfqP->model_indices[i];
895     }
896     mfqP->last_nmodelpoints = mfqP->nmodelpoints;
897     if (same && mfqP->delta == deltaold) {
898       PetscCall(PetscInfo(tao, "Identical model used in successive iterations\n"));
899       tao->reason = TAO_CONVERGED_STEPTOL;
900     }
901   }
902   PetscFunctionReturn(0);
903 }
904 
905 static PetscErrorCode TaoSetUp_POUNDERS(Tao tao)
906 {
907   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
908   PetscInt      i, j;
909   IS            isfloc, isfglob, isxloc, isxglob;
910 
911   PetscFunctionBegin;
912   if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient));
913   if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection));
914   PetscCall(VecGetSize(tao->solution, &mfqP->n));
915   PetscCall(VecGetSize(tao->ls_res, &mfqP->m));
916   mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n);
917   if (mfqP->npmax == PETSC_DEFAULT) mfqP->npmax = 2 * mfqP->n + 1;
918   mfqP->npmax = PetscMin((mfqP->n + 1) * (mfqP->n + 2) / 2, mfqP->npmax);
919   mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n + 2);
920 
921   PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Xhist));
922   PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fhist));
923   for (i = 0; i < mfqP->n + 1; i++) {
924     PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[i]));
925     PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[i]));
926   }
927   PetscCall(VecDuplicate(tao->solution, &mfqP->workxvec));
928   PetscCall(VecDuplicate(tao->ls_res, &mfqP->workfvec));
929   mfqP->nHist = 0;
930 
931   PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fres));
932   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->m, &mfqP->RES));
933   PetscCall(PetscMalloc1(mfqP->n, &mfqP->work));
934   PetscCall(PetscMalloc1(mfqP->n, &mfqP->work2));
935   PetscCall(PetscMalloc1(mfqP->n, &mfqP->work3));
936   PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n + 1), &mfqP->mwork));
937   PetscCall(PetscMalloc1(mfqP->npmax - mfqP->n - 1, &mfqP->omega));
938   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2, &mfqP->beta));
939   PetscCall(PetscMalloc1(mfqP->n + 1, &mfqP->alpha));
940 
941   PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->H));
942   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q));
943   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q_tmp));
944   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L));
945   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_tmp));
946   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_save));
947   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->N));
948   PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->n + 1), &mfqP->M));
949   PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->npmax - mfqP->n - 1), &mfqP->Z));
950   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau));
951   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau_tmp));
952   mfqP->nmax = PetscMax(5 * mfqP->npmax, mfqP->n * (mfqP->n + 1) / 2);
953   PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxwork));
954   PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxiwork));
955   PetscCall(PetscMalloc1(mfqP->n, &mfqP->xmin));
956   PetscCall(PetscMalloc1(mfqP->m, &mfqP->C));
957   PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Fdiff));
958   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->n, &mfqP->Disp));
959   PetscCall(PetscMalloc1(mfqP->n, &mfqP->Gres));
960   PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Hres));
961   PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Gpoints));
962   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->model_indices));
963   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->last_model_indices));
964   PetscCall(PetscMalloc1(mfqP->n, &mfqP->Xsubproblem));
965   PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Gdel));
966   PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->Hdel));
967   PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n), &mfqP->indices));
968   PetscCall(PetscMalloc1(mfqP->n, &mfqP->iwork));
969   PetscCall(PetscMalloc1(mfqP->m * mfqP->m, &mfqP->w));
970   for (i = 0; i < mfqP->m; i++) {
971     for (j = 0; j < mfqP->m; j++) {
972       if (i == j) {
973         mfqP->w[i + mfqP->m * j] = 1.0;
974       } else {
975         mfqP->w[i + mfqP->m * j] = 0.0;
976       }
977     }
978   }
979   for (i = 0; i < PetscMax(mfqP->m, mfqP->n); i++) mfqP->indices[i] = i;
980   PetscCallMPI(MPI_Comm_size(((PetscObject)tao)->comm, &mfqP->size));
981   if (mfqP->size > 1) {
982     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localx));
983     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localxmin));
984     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localf));
985     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localfmin));
986     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxloc));
987     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxglob));
988     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfloc));
989     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfglob));
990 
991     PetscCall(VecScatterCreate(tao->solution, isxglob, mfqP->localx, isxloc, &mfqP->scatterx));
992     PetscCall(VecScatterCreate(tao->ls_res, isfglob, mfqP->localf, isfloc, &mfqP->scatterf));
993 
994     PetscCall(ISDestroy(&isxloc));
995     PetscCall(ISDestroy(&isxglob));
996     PetscCall(ISDestroy(&isfloc));
997     PetscCall(ISDestroy(&isfglob));
998   }
999 
1000   if (!mfqP->usegqt) {
1001     KSP ksp;
1002     PC  pc;
1003     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Xsubproblem, &mfqP->subx));
1004     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->subxl));
1005     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subb));
1006     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subxu));
1007     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subpdel));
1008     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subndel));
1009     PetscCall(TaoCreate(PETSC_COMM_SELF, &mfqP->subtao));
1010     PetscCall(PetscObjectIncrementTabLevel((PetscObject)mfqP->subtao, (PetscObject)tao, 1));
1011     PetscCall(TaoSetType(mfqP->subtao, TAOBNTR));
1012     PetscCall(TaoSetOptionsPrefix(mfqP->subtao, "pounders_subsolver_"));
1013     PetscCall(TaoSetSolution(mfqP->subtao, mfqP->subx));
1014     PetscCall(TaoSetObjectiveAndGradient(mfqP->subtao, NULL, pounders_fg, (void *)mfqP));
1015     PetscCall(TaoSetMaximumIterations(mfqP->subtao, mfqP->gqt_maxits));
1016     PetscCall(TaoSetFromOptions(mfqP->subtao));
1017     PetscCall(TaoGetKSP(mfqP->subtao, &ksp));
1018     if (ksp) {
1019       PetscCall(KSPGetPC(ksp, &pc));
1020       PetscCall(PCSetType(pc, PCNONE));
1021     }
1022     PetscCall(TaoSetVariableBounds(mfqP->subtao, mfqP->subxl, mfqP->subxu));
1023     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Hres, &mfqP->subH));
1024     PetscCall(TaoSetHessian(mfqP->subtao, mfqP->subH, mfqP->subH, pounders_h, (void *)mfqP));
1025   }
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 static PetscErrorCode TaoDestroy_POUNDERS(Tao tao)
1030 {
1031   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1032   PetscInt      i;
1033 
1034   PetscFunctionBegin;
1035   if (!mfqP->usegqt) {
1036     PetscCall(TaoDestroy(&mfqP->subtao));
1037     PetscCall(VecDestroy(&mfqP->subx));
1038     PetscCall(VecDestroy(&mfqP->subxl));
1039     PetscCall(VecDestroy(&mfqP->subxu));
1040     PetscCall(VecDestroy(&mfqP->subb));
1041     PetscCall(VecDestroy(&mfqP->subpdel));
1042     PetscCall(VecDestroy(&mfqP->subndel));
1043     PetscCall(MatDestroy(&mfqP->subH));
1044   }
1045   PetscCall(PetscFree(mfqP->Fres));
1046   PetscCall(PetscFree(mfqP->RES));
1047   PetscCall(PetscFree(mfqP->work));
1048   PetscCall(PetscFree(mfqP->work2));
1049   PetscCall(PetscFree(mfqP->work3));
1050   PetscCall(PetscFree(mfqP->mwork));
1051   PetscCall(PetscFree(mfqP->omega));
1052   PetscCall(PetscFree(mfqP->beta));
1053   PetscCall(PetscFree(mfqP->alpha));
1054   PetscCall(PetscFree(mfqP->H));
1055   PetscCall(PetscFree(mfqP->Q));
1056   PetscCall(PetscFree(mfqP->Q_tmp));
1057   PetscCall(PetscFree(mfqP->L));
1058   PetscCall(PetscFree(mfqP->L_tmp));
1059   PetscCall(PetscFree(mfqP->L_save));
1060   PetscCall(PetscFree(mfqP->N));
1061   PetscCall(PetscFree(mfqP->M));
1062   PetscCall(PetscFree(mfqP->Z));
1063   PetscCall(PetscFree(mfqP->tau));
1064   PetscCall(PetscFree(mfqP->tau_tmp));
1065   PetscCall(PetscFree(mfqP->npmaxwork));
1066   PetscCall(PetscFree(mfqP->npmaxiwork));
1067   PetscCall(PetscFree(mfqP->xmin));
1068   PetscCall(PetscFree(mfqP->C));
1069   PetscCall(PetscFree(mfqP->Fdiff));
1070   PetscCall(PetscFree(mfqP->Disp));
1071   PetscCall(PetscFree(mfqP->Gres));
1072   PetscCall(PetscFree(mfqP->Hres));
1073   PetscCall(PetscFree(mfqP->Gpoints));
1074   PetscCall(PetscFree(mfqP->model_indices));
1075   PetscCall(PetscFree(mfqP->last_model_indices));
1076   PetscCall(PetscFree(mfqP->Xsubproblem));
1077   PetscCall(PetscFree(mfqP->Gdel));
1078   PetscCall(PetscFree(mfqP->Hdel));
1079   PetscCall(PetscFree(mfqP->indices));
1080   PetscCall(PetscFree(mfqP->iwork));
1081   PetscCall(PetscFree(mfqP->w));
1082   for (i = 0; i < mfqP->nHist; i++) {
1083     PetscCall(VecDestroy(&mfqP->Xhist[i]));
1084     PetscCall(VecDestroy(&mfqP->Fhist[i]));
1085   }
1086   PetscCall(VecDestroy(&mfqP->workxvec));
1087   PetscCall(VecDestroy(&mfqP->workfvec));
1088   PetscCall(PetscFree(mfqP->Xhist));
1089   PetscCall(PetscFree(mfqP->Fhist));
1090 
1091   if (mfqP->size > 1) {
1092     PetscCall(VecDestroy(&mfqP->localx));
1093     PetscCall(VecDestroy(&mfqP->localxmin));
1094     PetscCall(VecDestroy(&mfqP->localf));
1095     PetscCall(VecDestroy(&mfqP->localfmin));
1096   }
1097   PetscCall(PetscFree(tao->data));
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 static PetscErrorCode TaoSetFromOptions_POUNDERS(Tao tao, PetscOptionItems *PetscOptionsObject)
1102 {
1103   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1104 
1105   PetscFunctionBegin;
1106   PetscOptionsHeadBegin(PetscOptionsObject, "POUNDERS method for least-squares optimization");
1107   PetscCall(PetscOptionsReal("-tao_pounders_delta", "initial delta", "", mfqP->delta, &mfqP->delta0, NULL));
1108   mfqP->delta = mfqP->delta0;
1109   PetscCall(PetscOptionsInt("-tao_pounders_npmax", "max number of points in model", "", mfqP->npmax, &mfqP->npmax, NULL));
1110   PetscCall(PetscOptionsBool("-tao_pounders_gqt", "use gqt algorithm for subproblem", "", mfqP->usegqt, &mfqP->usegqt, NULL));
1111   PetscOptionsHeadEnd();
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 static PetscErrorCode TaoView_POUNDERS(Tao tao, PetscViewer viewer)
1116 {
1117   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1118   PetscBool     isascii;
1119 
1120   PetscFunctionBegin;
1121   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1122   if (isascii) {
1123     PetscCall(PetscViewerASCIIPrintf(viewer, "initial delta: %g\n", (double)mfqP->delta0));
1124     PetscCall(PetscViewerASCIIPrintf(viewer, "final delta: %g\n", (double)mfqP->delta));
1125     PetscCall(PetscViewerASCIIPrintf(viewer, "model points: %" PetscInt_FMT "\n", mfqP->nmodelpoints));
1126     if (mfqP->usegqt) {
1127       PetscCall(PetscViewerASCIIPrintf(viewer, "subproblem solver: gqt\n"));
1128     } else {
1129       PetscCall(TaoView(mfqP->subtao, viewer));
1130     }
1131   }
1132   PetscFunctionReturn(0);
1133 }
1134 /*MC
1135   TAOPOUNDERS - POUNDERS derivate-free model-based algorithm for nonlinear least squares
1136 
1137   Options Database Keys:
1138 + -tao_pounders_delta - initial step length
1139 . -tao_pounders_npmax - maximum number of points in model
1140 - -tao_pounders_gqt - use gqt algorithm for subproblem instead of TRON
1141 
1142   Level: beginner
1143 
1144 M*/
1145 
1146 PETSC_EXTERN PetscErrorCode TaoCreate_POUNDERS(Tao tao)
1147 {
1148   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1149 
1150   PetscFunctionBegin;
1151   tao->ops->setup          = TaoSetUp_POUNDERS;
1152   tao->ops->solve          = TaoSolve_POUNDERS;
1153   tao->ops->view           = TaoView_POUNDERS;
1154   tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS;
1155   tao->ops->destroy        = TaoDestroy_POUNDERS;
1156 
1157   PetscCall(PetscNew(&mfqP));
1158   tao->data = (void *)mfqP;
1159   /* Override default settings (unless already changed) */
1160   if (!tao->max_it_changed) tao->max_it = 2000;
1161   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
1162   mfqP->npmax      = PETSC_DEFAULT;
1163   mfqP->delta0     = 0.1;
1164   mfqP->delta      = 0.1;
1165   mfqP->deltamax   = 1e3;
1166   mfqP->deltamin   = 1e-6;
1167   mfqP->c2         = 10.0;
1168   mfqP->theta1     = 1e-5;
1169   mfqP->theta2     = 1e-4;
1170   mfqP->gamma0     = 0.5;
1171   mfqP->gamma1     = 2.0;
1172   mfqP->eta0       = 0.0;
1173   mfqP->eta1       = 0.1;
1174   mfqP->usegqt     = PETSC_FALSE;
1175   mfqP->gqt_rtol   = 0.001;
1176   mfqP->gqt_maxits = 50;
1177   mfqP->workxvec   = NULL;
1178   PetscFunctionReturn(0);
1179 }
1180