xref: /petsc/src/tao/leastsquares/impls/pounders/pounders.c (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 #include <../src/tao/leastsquares/impls/pounders/pounders.h>
2 
3 static PetscErrorCode pounders_h(Tao subtao, Vec v, Mat H, Mat Hpre, void *ctx) {
4   PetscFunctionBegin;
5   PetscFunctionReturn(0);
6 }
7 
8 static PetscErrorCode pounders_fg(Tao subtao, Vec x, PetscReal *f, Vec g, void *ctx) {
9   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)ctx;
10   PetscReal     d1, d2;
11 
12   PetscFunctionBegin;
13   /* g = A*x  (add b later)*/
14   PetscCall(MatMult(mfqP->subH, x, g));
15 
16   /* f = 1/2 * x'*(Ax) + b'*x  */
17   PetscCall(VecDot(x, g, &d1));
18   PetscCall(VecDot(mfqP->subb, x, &d2));
19   *f = 0.5 * d1 + d2;
20 
21   /* now  g = g + b */
22   PetscCall(VecAXPY(g, 1.0, mfqP->subb));
23   PetscFunctionReturn(0);
24 }
25 
26 static PetscErrorCode pounders_feval(Tao tao, Vec x, Vec F, PetscReal *fsum) {
27   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
28   PetscInt      i, row, col;
29   PetscReal     fr, fc;
30 
31   PetscFunctionBegin;
32   PetscCall(TaoComputeResidual(tao, x, F));
33   if (tao->res_weights_v) {
34     PetscCall(VecPointwiseMult(mfqP->workfvec, tao->res_weights_v, F));
35     PetscCall(VecDot(mfqP->workfvec, mfqP->workfvec, fsum));
36   } else if (tao->res_weights_w) {
37     *fsum = 0;
38     for (i = 0; i < tao->res_weights_n; i++) {
39       row = tao->res_weights_rows[i];
40       col = tao->res_weights_cols[i];
41       PetscCall(VecGetValues(F, 1, &row, &fr));
42       PetscCall(VecGetValues(F, 1, &col, &fc));
43       *fsum += tao->res_weights_w[i] * fc * fr;
44     }
45   } else {
46     PetscCall(VecDot(F, F, fsum));
47   }
48   PetscCall(PetscInfo(tao, "Least-squares residual norm: %20.19e\n", (double)*fsum));
49   PetscCheck(!PetscIsInfOrNanReal(*fsum), PETSC_COMM_SELF, PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
50   PetscFunctionReturn(0);
51 }
52 
53 static PetscErrorCode gqtwrap(Tao tao, PetscReal *gnorm, PetscReal *qmin) {
54 #if defined(PETSC_USE_REAL_SINGLE)
55   PetscReal atol = 1.0e-5;
56 #else
57   PetscReal atol = 1.0e-10;
58 #endif
59   PetscInt      info, its;
60   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
61 
62   PetscFunctionBegin;
63   if (!mfqP->usegqt) {
64     PetscReal maxval;
65     PetscInt  i, j;
66 
67     PetscCall(VecSetValues(mfqP->subb, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
68     PetscCall(VecAssemblyBegin(mfqP->subb));
69     PetscCall(VecAssemblyEnd(mfqP->subb));
70 
71     PetscCall(VecSet(mfqP->subx, 0.0));
72 
73     PetscCall(VecSet(mfqP->subndel, -1.0));
74     PetscCall(VecSet(mfqP->subpdel, +1.0));
75 
76     /* Complete the lower triangle of the Hessian matrix */
77     for (i = 0; i < mfqP->n; i++) {
78       for (j = i + 1; j < mfqP->n; j++) { mfqP->Hres[j + mfqP->n * i] = mfqP->Hres[mfqP->n * j + i]; }
79     }
80     PetscCall(MatSetValues(mfqP->subH, mfqP->n, mfqP->indices, mfqP->n, mfqP->indices, mfqP->Hres, INSERT_VALUES));
81     PetscCall(MatAssemblyBegin(mfqP->subH, MAT_FINAL_ASSEMBLY));
82     PetscCall(MatAssemblyEnd(mfqP->subH, MAT_FINAL_ASSEMBLY));
83 
84     PetscCall(TaoResetStatistics(mfqP->subtao));
85     /* PetscCall(TaoSetTolerances(mfqP->subtao,*gnorm,*gnorm,PETSC_DEFAULT)); */
86     /* enforce bound constraints -- experimental */
87     if (tao->XU && tao->XL) {
88       PetscCall(VecCopy(tao->XU, mfqP->subxu));
89       PetscCall(VecAXPY(mfqP->subxu, -1.0, tao->solution));
90       PetscCall(VecScale(mfqP->subxu, 1.0 / mfqP->delta));
91       PetscCall(VecCopy(tao->XL, mfqP->subxl));
92       PetscCall(VecAXPY(mfqP->subxl, -1.0, tao->solution));
93       PetscCall(VecScale(mfqP->subxl, 1.0 / mfqP->delta));
94 
95       PetscCall(VecPointwiseMin(mfqP->subxu, mfqP->subxu, mfqP->subpdel));
96       PetscCall(VecPointwiseMax(mfqP->subxl, mfqP->subxl, mfqP->subndel));
97     } else {
98       PetscCall(VecCopy(mfqP->subpdel, mfqP->subxu));
99       PetscCall(VecCopy(mfqP->subndel, mfqP->subxl));
100     }
101     /* Make sure xu > xl */
102     PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
103     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
104     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
105     PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "upper bound < lower bound in subproblem");
106     /* Make sure xu > tao->solution > xl */
107     PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
108     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx));
109     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
110     PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess < lower bound in subproblem");
111 
112     PetscCall(VecCopy(mfqP->subx, mfqP->subpdel));
113     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
114     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
115     PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess > upper bound in subproblem");
116 
117     PetscCall(TaoSolve(mfqP->subtao));
118     PetscCall(TaoGetSolutionStatus(mfqP->subtao, NULL, qmin, NULL, NULL, NULL, NULL));
119 
120     /* test bounds post-solution*/
121     PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
122     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx));
123     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
124     if (maxval > 1e-5) {
125       PetscCall(PetscInfo(tao, "subproblem solution < lower bound\n"));
126       tao->reason = TAO_DIVERGED_TR_REDUCTION;
127     }
128 
129     PetscCall(VecCopy(mfqP->subx, mfqP->subpdel));
130     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
131     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
132     if (maxval > 1e-5) {
133       PetscCall(PetscInfo(tao, "subproblem solution > upper bound\n"));
134       tao->reason = TAO_DIVERGED_TR_REDUCTION;
135     }
136   } else {
137     gqt(mfqP->n, mfqP->Hres, mfqP->n, mfqP->Gres, 1.0, mfqP->gqt_rtol, atol, mfqP->gqt_maxits, gnorm, qmin, mfqP->Xsubproblem, &info, &its, mfqP->work, mfqP->work2, mfqP->work3);
138   }
139   *qmin *= -1;
140   PetscFunctionReturn(0);
141 }
142 
143 static PetscErrorCode pounders_update_res(Tao tao) {
144   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
145   PetscInt      i, row, col;
146   PetscBLASInt  blasn = mfqP->n, blasn2 = blasn * blasn, blasm = mfqP->m, ione = 1;
147   PetscReal     zero = 0.0, one = 1.0, wii, factor;
148 
149   PetscFunctionBegin;
150   for (i = 0; i < mfqP->n; i++) { mfqP->Gres[i] = 0; }
151   for (i = 0; i < mfqP->n * mfqP->n; i++) { mfqP->Hres[i] = 0; }
152 
153   /* Compute Gres= sum_ij[wij * (cjgi + cigj)] */
154   if (tao->res_weights_v) {
155     /* Vector(diagonal) weights: gres = sum_i(wii*ci*gi) */
156     for (i = 0; i < mfqP->m; i++) {
157       PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &factor));
158       factor = factor * mfqP->C[i];
159       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * i], &ione, mfqP->Gres, &ione));
160     }
161 
162     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
163     /* vector(diagonal weights) Hres = sum_i(wii*(ci*Hi + gi * gi')*/
164     for (i = 0; i < mfqP->m; i++) {
165       PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &wii));
166       if (tao->niter > 1) {
167         factor = wii * mfqP->C[i];
168         /* add wii * ci * Hi */
169         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
170       }
171       /* add wii * gi * gi' */
172       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &wii, &mfqP->Fdiff[blasn * i], &blasn, &mfqP->Fdiff[blasn * i], &blasn, &one, mfqP->Hres, &blasn));
173     }
174   } else if (tao->res_weights_w) {
175     /* General case: .5 * Gres= sum_ij[wij * (cjgi + cigj)] */
176     for (i = 0; i < tao->res_weights_n; i++) {
177       row = tao->res_weights_rows[i];
178       col = tao->res_weights_cols[i];
179 
180       factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
181       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * row], &ione, mfqP->Gres, &ione));
182       factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
183       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * col], &ione, mfqP->Gres, &ione));
184     }
185 
186     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
187     /* .5 * sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
188     for (i = 0; i < tao->res_weights_n; i++) {
189       row    = tao->res_weights_rows[i];
190       col    = tao->res_weights_cols[i];
191       factor = tao->res_weights_w[i] / 2.0;
192       /* add wij * gi gj' + wij * gj gi' */
193       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * row], &blasn, &mfqP->Fdiff[blasn * col], &blasn, &one, mfqP->Hres, &blasn));
194       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * col], &blasn, &mfqP->Fdiff[blasn * row], &blasn, &one, mfqP->Hres, &blasn));
195     }
196     if (tao->niter > 1) {
197       for (i = 0; i < tao->res_weights_n; i++) {
198         row = tao->res_weights_rows[i];
199         col = tao->res_weights_cols[i];
200 
201         /* add  wij*cj*Hi */
202         factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
203         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[row], &blasm, mfqP->Hres, &ione));
204 
205         /* add wij*ci*Hj */
206         factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
207         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[col], &blasm, mfqP->Hres, &ione));
208       }
209     }
210   } else {
211     /* Default: Gres= sum_i[cigi] = G*c' */
212     PetscCall(PetscInfo(tao, "Identity weights\n"));
213     PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->C, &ione, &zero, mfqP->Gres, &ione));
214 
215     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
216     /*  Hres = G*G' + 0.5 sum {F(xkin,i)*H(:,:,i)}  */
217     PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->Fdiff, &blasn, &zero, mfqP->Hres, &blasn));
218 
219     /* sum(F(xkin,i)*H(:,:,i)) */
220     if (tao->niter > 1) {
221       for (i = 0; i < mfqP->m; i++) {
222         factor = mfqP->C[i];
223         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
224       }
225     }
226   }
227   PetscFunctionReturn(0);
228 }
229 
230 static PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi) {
231   /* Phi = .5*[x(1)^2  sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */
232   PetscInt  i, j, k;
233   PetscReal sqrt2 = PetscSqrtReal(2.0);
234 
235   PetscFunctionBegin;
236   j = 0;
237   for (i = 0; i < n; i++) {
238     phi[j] = 0.5 * x[i] * x[i];
239     j++;
240     for (k = i + 1; k < n; k++) {
241       phi[j] = x[i] * x[k] / sqrt2;
242       j++;
243     }
244   }
245   PetscFunctionReturn(0);
246 }
247 
248 static PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP) {
249   /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x'
250    that satisfies the interpolation conditions Q(X[:,j]) = f(j)
251    for j=1,...,m and with a Hessian matrix of least Frobenius norm */
252 
253   /* NB --we are ignoring c */
254   PetscInt     i, j, k, num, np = mfqP->nmodelpoints;
255   PetscReal    one = 1.0, zero = 0.0, negone = -1.0;
256   PetscBLASInt blasnpmax  = mfqP->npmax;
257   PetscBLASInt blasnplus1 = mfqP->n + 1;
258   PetscBLASInt blasnp     = np;
259   PetscBLASInt blasint    = mfqP->n * (mfqP->n + 1) / 2;
260   PetscBLASInt blasint2   = np - mfqP->n - 1;
261   PetscBLASInt info, ione = 1;
262   PetscReal    sqrt2 = PetscSqrtReal(2.0);
263 
264   PetscFunctionBegin;
265   for (i = 0; i < mfqP->n * mfqP->m; i++) { mfqP->Gdel[i] = 0; }
266   for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; i++) { mfqP->Hdel[i] = 0; }
267 
268   /* factor M */
269   PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&blasnplus1, &blasnp, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &info));
270   PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrf returned with value %" PetscBLASInt_FMT, info);
271 
272   if (np == mfqP->n + 1) {
273     for (i = 0; i < mfqP->npmax - mfqP->n - 1; i++) { mfqP->omega[i] = 0.0; }
274     for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2; i++) { mfqP->beta[i] = 0.0; }
275   } else {
276     /* Let Ltmp = (L'*L) */
277     PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &blasint2, &blasint2, &blasint, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &zero, mfqP->L_tmp, &blasint));
278 
279     /* factor Ltmp */
280     PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &blasint2, mfqP->L_tmp, &blasint, &info));
281     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrf returned with value %" PetscBLASInt_FMT, info);
282   }
283 
284   for (k = 0; k < mfqP->m; k++) {
285     if (np != mfqP->n + 1) {
286       /* Solve L'*L*Omega = Z' * RESk*/
287       PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasnp, &blasint2, &one, mfqP->Z, &blasnpmax, &mfqP->RES[mfqP->npmax * k], &ione, &zero, mfqP->omega, &ione));
288       PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &blasint2, &ione, mfqP->L_tmp, &blasint, mfqP->omega, &blasint2, &info));
289       PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrs returned with value %" PetscBLASInt_FMT, info);
290 
291       /* Beta = L*Omega */
292       PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasint, &blasint2, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, mfqP->omega, &ione, &zero, mfqP->beta, &ione));
293     }
294 
295     /* solve M'*Alpha = RESk - N'*Beta */
296     PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasint, &blasnp, &negone, mfqP->N, &blasint, mfqP->beta, &ione, &one, &mfqP->RES[mfqP->npmax * k], &ione));
297     PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &blasnplus1, &ione, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &mfqP->RES[mfqP->npmax * k], &blasnplus1, &info));
298     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrs returned with value %" PetscBLASInt_FMT, info);
299 
300     /* Gdel(:,k) = Alpha(2:n+1) */
301     for (i = 0; i < mfqP->n; i++) { mfqP->Gdel[i + mfqP->n * k] = mfqP->RES[mfqP->npmax * k + i + 1]; }
302 
303     /* Set Hdels */
304     num = 0;
305     for (i = 0; i < mfqP->n; i++) {
306       /* H[i,i,k] = Beta(num) */
307       mfqP->Hdel[(i * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num];
308       num++;
309       for (j = i + 1; j < mfqP->n; j++) {
310         /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */
311         mfqP->Hdel[(j * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
312         mfqP->Hdel[(i * mfqP->n + j) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
313         num++;
314       }
315     }
316   }
317   PetscFunctionReturn(0);
318 }
319 
320 static PetscErrorCode morepoints(TAO_POUNDERS *mfqP) {
321   /* Assumes mfqP->model_indices[0]  is minimum index
322    Finishes adding points to mfqP->model_indices (up to npmax)
323    Computes L,Z,M,N
324    np is actual number of points in model (should equal npmax?) */
325   PetscInt         point, i, j, offset;
326   PetscInt         reject;
327   PetscBLASInt     blasn = mfqP->n, blasnpmax = mfqP->npmax, blasnplus1 = mfqP->n + 1, info, blasnmax = mfqP->nmax, blasint, blasint2, blasnp, blasmaxmn;
328   const PetscReal *x;
329   PetscReal        normd;
330 
331   PetscFunctionBegin;
332   /* Initialize M,N */
333   for (i = 0; i < mfqP->n + 1; i++) {
334     PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x));
335     mfqP->M[(mfqP->n + 1) * i] = 1.0;
336     for (j = 0; j < mfqP->n; j++) { mfqP->M[j + 1 + ((mfqP->n + 1) * i)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; }
337     PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x));
338     PetscCall(phi2eval(&mfqP->M[1 + ((mfqP->n + 1) * i)], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * i]));
339   }
340 
341   /* Now we add points until we have npmax starting with the most recent ones */
342   point              = mfqP->nHist - 1;
343   mfqP->nmodelpoints = mfqP->n + 1;
344   while (mfqP->nmodelpoints < mfqP->npmax && point >= 0) {
345     /* Reject any points already in the model */
346     reject = 0;
347     for (j = 0; j < mfqP->n + 1; j++) {
348       if (point == mfqP->model_indices[j]) {
349         reject = 1;
350         break;
351       }
352     }
353 
354     /* Reject if norm(d) >c2 */
355     if (!reject) {
356       PetscCall(VecCopy(mfqP->Xhist[point], mfqP->workxvec));
357       PetscCall(VecAXPY(mfqP->workxvec, -1.0, mfqP->Xhist[mfqP->minindex]));
358       PetscCall(VecNorm(mfqP->workxvec, NORM_2, &normd));
359       normd /= mfqP->delta;
360       if (normd > mfqP->c2) { reject = 1; }
361     }
362     if (reject) {
363       point--;
364       continue;
365     }
366 
367     PetscCall(VecGetArrayRead(mfqP->Xhist[point], &x));
368     mfqP->M[(mfqP->n + 1) * mfqP->nmodelpoints] = 1.0;
369     for (j = 0; j < mfqP->n; j++) { mfqP->M[j + 1 + ((mfqP->n + 1) * mfqP->nmodelpoints)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; }
370     PetscCall(VecRestoreArrayRead(mfqP->Xhist[point], &x));
371     PetscCall(phi2eval(&mfqP->M[1 + (mfqP->n + 1) * mfqP->nmodelpoints], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * (mfqP->nmodelpoints)]));
372 
373     /* Update QR factorization */
374     /* Copy M' to Q_tmp */
375     for (i = 0; i < mfqP->n + 1; i++) {
376       for (j = 0; j < mfqP->npmax; j++) { mfqP->Q_tmp[j + mfqP->npmax * i] = mfqP->M[i + (mfqP->n + 1) * j]; }
377     }
378     blasnp    = mfqP->nmodelpoints + 1;
379     /* Q_tmp,R = qr(M') */
380     blasmaxmn = PetscMax(mfqP->m, mfqP->n + 1);
381     PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->mwork, &blasmaxmn, &info));
382     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine geqrf returned with value %" PetscBLASInt_FMT, info);
383 
384     /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */
385     /* L = N*Qtmp */
386     blasint2 = mfqP->n * (mfqP->n + 1) / 2;
387     /* Copy N to L_tmp */
388     for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2 * mfqP->npmax; i++) { mfqP->L_tmp[i] = mfqP->N[i]; }
389     /* Copy L_save to L_tmp */
390 
391     /* L_tmp = N*Qtmp' */
392     PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasint2, &blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->L_tmp, &blasint2, mfqP->npmaxwork, &blasnmax, &info));
393     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info);
394 
395     /* Copy L_tmp to L_save */
396     for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) { mfqP->L_save[i] = mfqP->L_tmp[i]; }
397 
398     /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */
399     blasint = mfqP->nmodelpoints - mfqP->n;
400     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
401     PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &blasint2, &blasint, &mfqP->L_tmp[(mfqP->n + 1) * blasint2], &blasint2, mfqP->beta, mfqP->work, &blasn, mfqP->work, &blasn, mfqP->npmaxwork, &blasnmax, &info));
402     PetscCall(PetscFPTrapPop());
403     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine gesvd returned with value %" PetscBLASInt_FMT, info);
404 
405     if (mfqP->beta[PetscMin(blasint, blasint2) - 1] > mfqP->theta2) {
406       /* accept point */
407       mfqP->model_indices[mfqP->nmodelpoints] = point;
408       /* Copy Q_tmp to Q */
409       for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) { mfqP->Q[i] = mfqP->Q_tmp[i]; }
410       for (i = 0; i < mfqP->npmax; i++) { mfqP->tau[i] = mfqP->tau_tmp[i]; }
411       mfqP->nmodelpoints++;
412       blasnp = mfqP->nmodelpoints;
413 
414       /* Copy L_save to L */
415       for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) { mfqP->L[i] = mfqP->L_save[i]; }
416     }
417     point--;
418   }
419 
420   blasnp = mfqP->nmodelpoints;
421   /* Copy Q(:,n+2:np) to Z */
422   /* First set Q_tmp to I */
423   for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) { mfqP->Q_tmp[i] = 0.0; }
424   for (i = 0; i < mfqP->npmax; i++) { mfqP->Q_tmp[i + mfqP->npmax * i] = 1.0; }
425 
426   /* Q_tmp = I * Q */
427   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasnp, &blasnp, &blasnplus1, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));
428   PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info);
429 
430   /* Copy Q_tmp(:,n+2:np) to Z) */
431   offset = mfqP->npmax * (mfqP->n + 1);
432   for (i = offset; i < mfqP->npmax * mfqP->npmax; i++) { mfqP->Z[i - offset] = mfqP->Q_tmp[i]; }
433 
434   if (mfqP->nmodelpoints == mfqP->n + 1) {
435     /* Set L to I_{n+1} */
436     for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) { mfqP->L[i] = 0.0; }
437     for (i = 0; i < mfqP->n; i++) { mfqP->L[(mfqP->n * (mfqP->n + 1) / 2) * i + i] = 1.0; }
438   }
439   PetscFunctionReturn(0);
440 }
441 
442 /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */
443 static PetscErrorCode addpoint(Tao tao, TAO_POUNDERS *mfqP, PetscInt index) {
444   PetscFunctionBegin;
445   /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/
446   PetscCall(VecDuplicate(mfqP->Xhist[0], &mfqP->Xhist[mfqP->nHist]));
447   PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, &mfqP->Q_tmp[index * mfqP->npmax], INSERT_VALUES));
448   PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]));
449   PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]));
450   PetscCall(VecAYPX(mfqP->Xhist[mfqP->nHist], mfqP->delta, mfqP->Xhist[mfqP->minindex]));
451 
452   /* Project into feasible region */
453   if (tao->XU && tao->XL) { PetscCall(VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU, mfqP->Xhist[mfqP->nHist])); }
454 
455   /* Compute value of new vector */
456   PetscCall(VecDuplicate(mfqP->Fhist[0], &mfqP->Fhist[mfqP->nHist]));
457   CHKMEMQ;
458   PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]));
459 
460   /* Add new vector to model */
461   mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist;
462   mfqP->nmodelpoints++;
463   mfqP->nHist++;
464   PetscFunctionReturn(0);
465 }
466 
467 static PetscErrorCode modelimprove(Tao tao, TAO_POUNDERS *mfqP, PetscInt addallpoints) {
468   /* modeld = Q(:,np+1:n)' */
469   PetscInt     i, j, minindex = 0;
470   PetscReal    dp, half = 0.5, one = 1.0, minvalue = PETSC_INFINITY;
471   PetscBLASInt blasn = mfqP->n, blasnpmax = mfqP->npmax, blask, info;
472   PetscBLASInt blas1 = 1, blasnmax = mfqP->nmax;
473 
474   PetscFunctionBegin;
475   blask = mfqP->nmodelpoints;
476   /* Qtmp = I(n x n) */
477   for (i = 0; i < mfqP->n; i++) {
478     for (j = 0; j < mfqP->n; j++) { mfqP->Q_tmp[i + mfqP->npmax * j] = 0.0; }
479   }
480   for (j = 0; j < mfqP->n; j++) { mfqP->Q_tmp[j + mfqP->npmax * j] = 1.0; }
481 
482   /* Qtmp = Q * I */
483   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasn, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));
484 
485   for (i = mfqP->nmodelpoints; i < mfqP->n; i++) {
486     PetscCallBLAS("BLASdot", dp = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->Gres, &blas1));
487     if (dp > 0.0) { /* Model says use the other direction! */
488       for (j = 0; j < mfqP->n; j++) { mfqP->Q_tmp[i * mfqP->npmax + j] *= -1; }
489     }
490     /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */
491     for (j = 0; j < mfqP->n; j++) { mfqP->work2[j] = mfqP->Gres[j]; }
492     PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &half, mfqP->Hres, &blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, &one, mfqP->work2, &blas1));
493     PetscCallBLAS("BLASdot", mfqP->work[i] = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->work2, &blas1));
494     if (i == mfqP->nmodelpoints || mfqP->work[i] < minvalue) {
495       minindex = i;
496       minvalue = mfqP->work[i];
497     }
498     if (addallpoints != 0) { PetscCall(addpoint(tao, mfqP, i)); }
499   }
500   if (!addallpoints) { PetscCall(addpoint(tao, mfqP, minindex)); }
501   PetscFunctionReturn(0);
502 }
503 
504 static PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, PetscReal c) {
505   PetscInt         i, j;
506   PetscBLASInt     blasm = mfqP->m, blasj, blask, blasn = mfqP->n, ione = 1, info;
507   PetscBLASInt     blasnpmax = mfqP->npmax, blasmaxmn;
508   PetscReal        proj, normd;
509   const PetscReal *x;
510 
511   PetscFunctionBegin;
512   for (i = mfqP->nHist - 1; i >= 0; i--) {
513     PetscCall(VecGetArrayRead(mfqP->Xhist[i], &x));
514     for (j = 0; j < mfqP->n; j++) { mfqP->work[j] = (x[j] - xmin[j]) / mfqP->delta; }
515     PetscCall(VecRestoreArrayRead(mfqP->Xhist[i], &x));
516     PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, mfqP->work2, &ione));
517     PetscCallBLAS("BLASnrm2", normd = BLASnrm2_(&blasn, mfqP->work, &ione));
518     if (normd <= c) {
519       blasj = PetscMax((mfqP->n - mfqP->nmodelpoints), 0);
520       if (!mfqP->q_is_I) {
521         /* project D onto null */
522         blask = (mfqP->nmodelpoints);
523         PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &ione, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->work2, &ione, mfqP->mwork, &blasm, &info));
524         PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "ormqr returned value %" PetscBLASInt_FMT, info);
525       }
526       PetscCallBLAS("BLASnrm2", proj = BLASnrm2_(&blasj, &mfqP->work2[mfqP->nmodelpoints], &ione));
527 
528       if (proj >= mfqP->theta1) { /* add this index to model */
529         mfqP->model_indices[mfqP->nmodelpoints] = i;
530         mfqP->nmodelpoints++;
531         PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, &mfqP->Q_tmp[mfqP->npmax * (mfqP->nmodelpoints - 1)], &ione));
532         blask = mfqP->npmax * (mfqP->nmodelpoints);
533         PetscCallBLAS("BLAScopy", BLAScopy_(&blask, mfqP->Q_tmp, &ione, mfqP->Q, &ione));
534         blask     = mfqP->nmodelpoints;
535         blasmaxmn = PetscMax(mfqP->m, mfqP->n);
536         PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->mwork, &blasmaxmn, &info));
537         PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "geqrf returned value %" PetscBLASInt_FMT, info);
538         mfqP->q_is_I = 0;
539       }
540       if (mfqP->nmodelpoints == mfqP->n) { break; }
541     }
542   }
543 
544   PetscFunctionReturn(0);
545 }
546 
547 static PetscErrorCode TaoSolve_POUNDERS(Tao tao) {
548   TAO_POUNDERS    *mfqP = (TAO_POUNDERS *)tao->data;
549   PetscInt         i, ii, j, k, l;
550   PetscReal        step = 1.0;
551   PetscInt         low, high;
552   PetscReal        minnorm;
553   PetscReal       *x, *f;
554   const PetscReal *xmint, *fmin;
555   PetscReal        deltaold;
556   PetscReal        gnorm;
557   PetscBLASInt     info, ione = 1, iblas;
558   PetscBool        valid, same;
559   PetscReal        mdec, rho, normxsp;
560   PetscReal        one = 1.0, zero = 0.0, ratio;
561   PetscBLASInt     blasm, blasn, blasncopy, blasnpmax;
562   static PetscBool set = PETSC_FALSE;
563 
564   /* n = # of parameters
565      m = dimension (components) of function  */
566   PetscFunctionBegin;
567   PetscCall(PetscCitationsRegister("@article{UNEDF0,\n"
568                                    "title = {Nuclear energy density optimization},\n"
569                                    "author = {Kortelainen, M.  and Lesinski, T.  and Mor\'e, J.  and Nazarewicz, W.\n"
570                                    "          and Sarich, J.  and Schunck, N.  and Stoitsov, M. V. and Wild, S. },\n"
571                                    "journal = {Phys. Rev. C},\n"
572                                    "volume = {82},\n"
573                                    "number = {2},\n"
574                                    "pages = {024313},\n"
575                                    "numpages = {18},\n"
576                                    "year = {2010},\n"
577                                    "month = {Aug},\n"
578                                    "doi = {10.1103/PhysRevC.82.024313}\n}\n",
579                                    &set));
580   tao->niter = 0;
581   if (tao->XL && tao->XU) {
582     /* Check x0 <= XU */
583     PetscReal val;
584 
585     PetscCall(VecCopy(tao->solution, mfqP->Xhist[0]));
586     PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU));
587     PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
588     PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 > upper bound");
589 
590     /* Check x0 >= xl */
591     PetscCall(VecCopy(tao->XL, mfqP->Xhist[0]));
592     PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->solution));
593     PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
594     PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 < lower bound");
595 
596     /* Check x0 + delta < XU  -- should be able to get around this eventually */
597 
598     PetscCall(VecSet(mfqP->Xhist[0], mfqP->delta));
599     PetscCall(VecAXPY(mfqP->Xhist[0], 1.0, tao->solution));
600     PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU));
601     PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
602     PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 + delta > upper bound");
603   }
604 
605   blasm     = mfqP->m;
606   blasn     = mfqP->n;
607   blasnpmax = mfqP->npmax;
608   for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; ++i) mfqP->H[i] = 0;
609 
610   PetscCall(VecCopy(tao->solution, mfqP->Xhist[0]));
611 
612   /* This provides enough information to approximate the gradient of the objective */
613   /* using a forward difference scheme. */
614 
615   PetscCall(PetscInfo(tao, "Initialize simplex; delta = %10.9e\n", (double)mfqP->delta));
616   PetscCall(pounders_feval(tao, mfqP->Xhist[0], mfqP->Fhist[0], &mfqP->Fres[0]));
617   mfqP->minindex = 0;
618   minnorm        = mfqP->Fres[0];
619 
620   PetscCall(VecGetOwnershipRange(mfqP->Xhist[0], &low, &high));
621   for (i = 1; i < mfqP->n + 1; ++i) {
622     PetscCall(VecCopy(mfqP->Xhist[0], mfqP->Xhist[i]));
623 
624     if (i - 1 >= low && i - 1 < high) {
625       PetscCall(VecGetArray(mfqP->Xhist[i], &x));
626       x[i - 1 - low] += mfqP->delta;
627       PetscCall(VecRestoreArray(mfqP->Xhist[i], &x));
628     }
629     CHKMEMQ;
630     PetscCall(pounders_feval(tao, mfqP->Xhist[i], mfqP->Fhist[i], &mfqP->Fres[i]));
631     if (mfqP->Fres[i] < minnorm) {
632       mfqP->minindex = i;
633       minnorm        = mfqP->Fres[i];
634     }
635   }
636   PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
637   PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res));
638   PetscCall(PetscInfo(tao, "Finalize simplex; minnorm = %10.9e\n", (double)minnorm));
639 
640   /* Gather mpi vecs to one big local vec */
641 
642   /* Begin serial code */
643 
644   /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
645   /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
646   /* (Column oriented for blas calls) */
647   ii = 0;
648 
649   PetscCall(PetscInfo(tao, "Build matrix: %" PetscInt_FMT "\n", (PetscInt)mfqP->size));
650   if (1 == mfqP->size) {
651     PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
652     for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
653     PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
654     PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
655     for (i = 0; i < mfqP->n + 1; i++) {
656       if (i == mfqP->minindex) continue;
657 
658       PetscCall(VecGetArray(mfqP->Xhist[i], &x));
659       for (j = 0; j < mfqP->n; j++) { mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; }
660       PetscCall(VecRestoreArray(mfqP->Xhist[i], &x));
661 
662       PetscCall(VecGetArray(mfqP->Fhist[i], &f));
663       for (j = 0; j < mfqP->m; j++) { mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; }
664       PetscCall(VecRestoreArray(mfqP->Fhist[i], &f));
665 
666       mfqP->model_indices[ii++] = i;
667     }
668     for (j = 0; j < mfqP->m; j++) { mfqP->C[j] = fmin[j]; }
669     PetscCall(VecRestoreArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
670   } else {
671     PetscCall(VecSet(mfqP->localxmin, 0));
672     PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD));
673     PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD));
674 
675     PetscCall(VecGetArrayRead(mfqP->localxmin, &xmint));
676     for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
677     PetscCall(VecRestoreArrayRead(mfqP->localxmin, &xmint));
678 
679     PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD));
680     PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD));
681     PetscCall(VecGetArrayRead(mfqP->localfmin, &fmin));
682     for (i = 0; i < mfqP->n + 1; i++) {
683       if (i == mfqP->minindex) continue;
684 
685       PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD));
686       PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD));
687       PetscCall(VecGetArray(mfqP->localx, &x));
688       for (j = 0; j < mfqP->n; j++) { mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; }
689       PetscCall(VecRestoreArray(mfqP->localx, &x));
690 
691       PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD));
692       PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD));
693       PetscCall(VecGetArray(mfqP->localf, &f));
694       for (j = 0; j < mfqP->m; j++) { mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; }
695       PetscCall(VecRestoreArray(mfqP->localf, &f));
696 
697       mfqP->model_indices[ii++] = i;
698     }
699     for (j = 0; j < mfqP->m; j++) { mfqP->C[j] = fmin[j]; }
700     PetscCall(VecRestoreArrayRead(mfqP->localfmin, &fmin));
701   }
702 
703   /* Determine the initial quadratic models */
704   /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */
705   /* D (nxn) Fdiff (nxm)  => G (nxm) */
706   blasncopy = blasn;
707   PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&blasn, &blasm, mfqP->Disp, &blasnpmax, mfqP->iwork, mfqP->Fdiff, &blasncopy, &info));
708   PetscCall(PetscInfo(tao, "Linear solve return: %" PetscInt_FMT "\n", (PetscInt)info));
709 
710   PetscCall(pounders_update_res(tao));
711 
712   valid = PETSC_TRUE;
713 
714   PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
715   PetscCall(VecAssemblyBegin(tao->gradient));
716   PetscCall(VecAssemblyEnd(tao->gradient));
717   PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
718   gnorm *= mfqP->delta;
719   PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
720 
721   tao->reason = TAO_CONTINUE_ITERATING;
722   PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its));
723   PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step));
724   PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
725 
726   mfqP->nHist        = mfqP->n + 1;
727   mfqP->nmodelpoints = mfqP->n + 1;
728   PetscCall(PetscInfo(tao, "Initial gradient: %20.19e\n", (double)gnorm));
729 
730   while (tao->reason == TAO_CONTINUE_ITERATING) {
731     PetscReal gnm = 1e-4;
732     /* Call general purpose update function */
733     PetscTryTypeMethod(tao, update, tao->niter, tao->user_update);
734     tao->niter++;
735     /* Solve the subproblem min{Q(s): ||s|| <= 1.0} */
736     PetscCall(gqtwrap(tao, &gnm, &mdec));
737     /* Evaluate the function at the new point */
738 
739     for (i = 0; i < mfqP->n; i++) { mfqP->work[i] = mfqP->Xsubproblem[i] * mfqP->delta + mfqP->xmin[i]; }
740     PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[mfqP->nHist]));
741     PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[mfqP->nHist]));
742     PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, mfqP->work, INSERT_VALUES));
743     PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]));
744     PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]));
745 
746     PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]));
747 
748     rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec;
749     mfqP->nHist++;
750 
751     /* Update the center */
752     if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid == PETSC_TRUE)) {
753       /* Update model to reflect new base point */
754       for (i = 0; i < mfqP->n; i++) { mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i]) / mfqP->delta; }
755       for (j = 0; j < mfqP->m; j++) {
756         /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work';
757          G(:,j) = G(:,j) + H(:,:,j)*work' */
758         for (k = 0; k < mfqP->n; k++) {
759           mfqP->work2[k] = 0.0;
760           for (l = 0; l < mfqP->n; l++) { mfqP->work2[k] += mfqP->H[j + mfqP->m * (k + l * mfqP->n)] * mfqP->work[l]; }
761         }
762         for (i = 0; i < mfqP->n; i++) {
763           mfqP->C[j] += mfqP->work[i] * (mfqP->Fdiff[i + mfqP->n * j] + 0.5 * mfqP->work2[i]);
764           mfqP->Fdiff[i + mfqP->n * j] += mfqP->work2[i];
765         }
766       }
767       /* Cres += work*Gres + .5*work*Hres*work';
768        Gres += Hres*work'; */
769 
770       PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &one, mfqP->Hres, &blasn, mfqP->work, &ione, &zero, mfqP->work2, &ione));
771       for (i = 0; i < mfqP->n; i++) { mfqP->Gres[i] += mfqP->work2[i]; }
772       mfqP->minindex = mfqP->nHist - 1;
773       minnorm        = mfqP->Fres[mfqP->minindex];
774       PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res));
775       /* Change current center */
776       PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
777       for (i = 0; i < mfqP->n; i++) { mfqP->xmin[i] = xmint[i]; }
778       PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
779     }
780 
781     /* Evaluate at a model-improving point if necessary */
782     if (valid == PETSC_FALSE) {
783       mfqP->q_is_I       = 1;
784       mfqP->nmodelpoints = 0;
785       PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1));
786       if (mfqP->nmodelpoints < mfqP->n) {
787         PetscCall(PetscInfo(tao, "Model not valid -- model-improving\n"));
788         PetscCall(modelimprove(tao, mfqP, 1));
789       }
790     }
791 
792     /* Update the trust region radius */
793     deltaold = mfqP->delta;
794     normxsp  = 0;
795     for (i = 0; i < mfqP->n; i++) { normxsp += mfqP->Xsubproblem[i] * mfqP->Xsubproblem[i]; }
796     normxsp = PetscSqrtReal(normxsp);
797     if (rho >= mfqP->eta1 && normxsp > 0.5 * mfqP->delta) {
798       mfqP->delta = PetscMin(mfqP->delta * mfqP->gamma1, mfqP->deltamax);
799     } else if (valid == PETSC_TRUE) {
800       mfqP->delta = PetscMax(mfqP->delta * mfqP->gamma0, mfqP->deltamin);
801     }
802 
803     /* Compute the next interpolation set */
804     mfqP->q_is_I       = 1;
805     mfqP->nmodelpoints = 0;
806     PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c1 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c1));
807     PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1));
808     if (mfqP->nmodelpoints == mfqP->n) {
809       valid = PETSC_TRUE;
810     } else {
811       valid = PETSC_FALSE;
812       PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c2 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c2));
813       PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c2));
814       if (mfqP->n > mfqP->nmodelpoints) {
815         PetscCall(PetscInfo(tao, "Model not valid -- adding geometry points\n"));
816         PetscCall(modelimprove(tao, mfqP, mfqP->n - mfqP->nmodelpoints));
817       }
818     }
819     for (i = mfqP->nmodelpoints; i > 0; i--) { mfqP->model_indices[i] = mfqP->model_indices[i - 1]; }
820     mfqP->nmodelpoints++;
821     mfqP->model_indices[0] = mfqP->minindex;
822     PetscCall(morepoints(mfqP));
823     for (i = 0; i < mfqP->nmodelpoints; i++) {
824       PetscCall(VecGetArray(mfqP->Xhist[mfqP->model_indices[i]], &x));
825       for (j = 0; j < mfqP->n; j++) { mfqP->Disp[i + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / deltaold; }
826       PetscCall(VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]], &x));
827       PetscCall(VecGetArray(mfqP->Fhist[mfqP->model_indices[i]], &f));
828       for (j = 0; j < mfqP->m; j++) {
829         for (k = 0; k < mfqP->n; k++) {
830           mfqP->work[k] = 0.0;
831           for (l = 0; l < mfqP->n; l++) { mfqP->work[k] += mfqP->H[j + mfqP->m * (k + mfqP->n * l)] * mfqP->Disp[i + mfqP->npmax * l]; }
832         }
833         PetscCallBLAS("BLASdot", mfqP->RES[j * mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn, &mfqP->Fdiff[j * mfqP->n], &ione, &mfqP->Disp[i], &blasnpmax) - 0.5 * BLASdot_(&blasn, mfqP->work, &ione, &mfqP->Disp[i], &blasnpmax) + f[j]);
834       }
835       PetscCall(VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]], &f));
836     }
837 
838     /* Update the quadratic model */
839     PetscCall(PetscInfo(tao, "Get Quad, size: %" PetscInt_FMT ", points: %" PetscInt_FMT "\n", mfqP->n, mfqP->nmodelpoints));
840     PetscCall(getquadpounders(mfqP));
841     PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
842     PetscCallBLAS("BLAScopy", BLAScopy_(&blasm, fmin, &ione, mfqP->C, &ione));
843     /* G = G*(delta/deltaold) + Gdel */
844     ratio = mfqP->delta / deltaold;
845     iblas = blasm * blasn;
846     PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->Fdiff, &ione));
847     PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Gdel, &ione, mfqP->Fdiff, &ione));
848     /* H = H*(delta/deltaold)^2 + Hdel */
849     iblas = blasm * blasn * blasn;
850     ratio *= ratio;
851     PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->H, &ione));
852     PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Hdel, &ione, mfqP->H, &ione));
853 
854     /* Get residuals */
855     PetscCall(pounders_update_res(tao));
856 
857     /* Export solution and gradient residual to TAO */
858     PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
859     PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
860     PetscCall(VecAssemblyBegin(tao->gradient));
861     PetscCall(VecAssemblyEnd(tao->gradient));
862     PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
863     gnorm *= mfqP->delta;
864     /*  final criticality test */
865     PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its));
866     PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step));
867     PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
868     /* test for repeated model */
869     if (mfqP->nmodelpoints == mfqP->last_nmodelpoints) {
870       same = PETSC_TRUE;
871     } else {
872       same = PETSC_FALSE;
873     }
874     for (i = 0; i < mfqP->nmodelpoints; i++) {
875       if (same) {
876         if (mfqP->model_indices[i] == mfqP->last_model_indices[i]) {
877           same = PETSC_TRUE;
878         } else {
879           same = PETSC_FALSE;
880         }
881       }
882       mfqP->last_model_indices[i] = mfqP->model_indices[i];
883     }
884     mfqP->last_nmodelpoints = mfqP->nmodelpoints;
885     if (same && mfqP->delta == deltaold) {
886       PetscCall(PetscInfo(tao, "Identical model used in successive iterations\n"));
887       tao->reason = TAO_CONVERGED_STEPTOL;
888     }
889   }
890   PetscFunctionReturn(0);
891 }
892 
893 static PetscErrorCode TaoSetUp_POUNDERS(Tao tao) {
894   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
895   PetscInt      i, j;
896   IS            isfloc, isfglob, isxloc, isxglob;
897 
898   PetscFunctionBegin;
899   if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient));
900   if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection));
901   PetscCall(VecGetSize(tao->solution, &mfqP->n));
902   PetscCall(VecGetSize(tao->ls_res, &mfqP->m));
903   mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n);
904   if (mfqP->npmax == PETSC_DEFAULT) { mfqP->npmax = 2 * mfqP->n + 1; }
905   mfqP->npmax = PetscMin((mfqP->n + 1) * (mfqP->n + 2) / 2, mfqP->npmax);
906   mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n + 2);
907 
908   PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Xhist));
909   PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fhist));
910   for (i = 0; i < mfqP->n + 1; i++) {
911     PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[i]));
912     PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[i]));
913   }
914   PetscCall(VecDuplicate(tao->solution, &mfqP->workxvec));
915   PetscCall(VecDuplicate(tao->ls_res, &mfqP->workfvec));
916   mfqP->nHist = 0;
917 
918   PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fres));
919   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->m, &mfqP->RES));
920   PetscCall(PetscMalloc1(mfqP->n, &mfqP->work));
921   PetscCall(PetscMalloc1(mfqP->n, &mfqP->work2));
922   PetscCall(PetscMalloc1(mfqP->n, &mfqP->work3));
923   PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n + 1), &mfqP->mwork));
924   PetscCall(PetscMalloc1(mfqP->npmax - mfqP->n - 1, &mfqP->omega));
925   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2, &mfqP->beta));
926   PetscCall(PetscMalloc1(mfqP->n + 1, &mfqP->alpha));
927 
928   PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->H));
929   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q));
930   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q_tmp));
931   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L));
932   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_tmp));
933   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_save));
934   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->N));
935   PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->n + 1), &mfqP->M));
936   PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->npmax - mfqP->n - 1), &mfqP->Z));
937   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau));
938   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau_tmp));
939   mfqP->nmax = PetscMax(5 * mfqP->npmax, mfqP->n * (mfqP->n + 1) / 2);
940   PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxwork));
941   PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxiwork));
942   PetscCall(PetscMalloc1(mfqP->n, &mfqP->xmin));
943   PetscCall(PetscMalloc1(mfqP->m, &mfqP->C));
944   PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Fdiff));
945   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->n, &mfqP->Disp));
946   PetscCall(PetscMalloc1(mfqP->n, &mfqP->Gres));
947   PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Hres));
948   PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Gpoints));
949   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->model_indices));
950   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->last_model_indices));
951   PetscCall(PetscMalloc1(mfqP->n, &mfqP->Xsubproblem));
952   PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Gdel));
953   PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->Hdel));
954   PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n), &mfqP->indices));
955   PetscCall(PetscMalloc1(mfqP->n, &mfqP->iwork));
956   PetscCall(PetscMalloc1(mfqP->m * mfqP->m, &mfqP->w));
957   for (i = 0; i < mfqP->m; i++) {
958     for (j = 0; j < mfqP->m; j++) {
959       if (i == j) {
960         mfqP->w[i + mfqP->m * j] = 1.0;
961       } else {
962         mfqP->w[i + mfqP->m * j] = 0.0;
963       }
964     }
965   }
966   for (i = 0; i < PetscMax(mfqP->m, mfqP->n); i++) { mfqP->indices[i] = i; }
967   PetscCallMPI(MPI_Comm_size(((PetscObject)tao)->comm, &mfqP->size));
968   if (mfqP->size > 1) {
969     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localx));
970     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localxmin));
971     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localf));
972     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localfmin));
973     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxloc));
974     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxglob));
975     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfloc));
976     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfglob));
977 
978     PetscCall(VecScatterCreate(tao->solution, isxglob, mfqP->localx, isxloc, &mfqP->scatterx));
979     PetscCall(VecScatterCreate(tao->ls_res, isfglob, mfqP->localf, isfloc, &mfqP->scatterf));
980 
981     PetscCall(ISDestroy(&isxloc));
982     PetscCall(ISDestroy(&isxglob));
983     PetscCall(ISDestroy(&isfloc));
984     PetscCall(ISDestroy(&isfglob));
985   }
986 
987   if (!mfqP->usegqt) {
988     KSP ksp;
989     PC  pc;
990     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Xsubproblem, &mfqP->subx));
991     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->subxl));
992     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subb));
993     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subxu));
994     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subpdel));
995     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subndel));
996     PetscCall(TaoCreate(PETSC_COMM_SELF, &mfqP->subtao));
997     PetscCall(PetscObjectIncrementTabLevel((PetscObject)mfqP->subtao, (PetscObject)tao, 1));
998     PetscCall(TaoSetType(mfqP->subtao, TAOBNTR));
999     PetscCall(TaoSetOptionsPrefix(mfqP->subtao, "pounders_subsolver_"));
1000     PetscCall(TaoSetSolution(mfqP->subtao, mfqP->subx));
1001     PetscCall(TaoSetObjectiveAndGradient(mfqP->subtao, NULL, pounders_fg, (void *)mfqP));
1002     PetscCall(TaoSetMaximumIterations(mfqP->subtao, mfqP->gqt_maxits));
1003     PetscCall(TaoSetFromOptions(mfqP->subtao));
1004     PetscCall(TaoGetKSP(mfqP->subtao, &ksp));
1005     if (ksp) {
1006       PetscCall(KSPGetPC(ksp, &pc));
1007       PetscCall(PCSetType(pc, PCNONE));
1008     }
1009     PetscCall(TaoSetVariableBounds(mfqP->subtao, mfqP->subxl, mfqP->subxu));
1010     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Hres, &mfqP->subH));
1011     PetscCall(TaoSetHessian(mfqP->subtao, mfqP->subH, mfqP->subH, pounders_h, (void *)mfqP));
1012   }
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 static PetscErrorCode TaoDestroy_POUNDERS(Tao tao) {
1017   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1018   PetscInt      i;
1019 
1020   PetscFunctionBegin;
1021   if (!mfqP->usegqt) {
1022     PetscCall(TaoDestroy(&mfqP->subtao));
1023     PetscCall(VecDestroy(&mfqP->subx));
1024     PetscCall(VecDestroy(&mfqP->subxl));
1025     PetscCall(VecDestroy(&mfqP->subxu));
1026     PetscCall(VecDestroy(&mfqP->subb));
1027     PetscCall(VecDestroy(&mfqP->subpdel));
1028     PetscCall(VecDestroy(&mfqP->subndel));
1029     PetscCall(MatDestroy(&mfqP->subH));
1030   }
1031   PetscCall(PetscFree(mfqP->Fres));
1032   PetscCall(PetscFree(mfqP->RES));
1033   PetscCall(PetscFree(mfqP->work));
1034   PetscCall(PetscFree(mfqP->work2));
1035   PetscCall(PetscFree(mfqP->work3));
1036   PetscCall(PetscFree(mfqP->mwork));
1037   PetscCall(PetscFree(mfqP->omega));
1038   PetscCall(PetscFree(mfqP->beta));
1039   PetscCall(PetscFree(mfqP->alpha));
1040   PetscCall(PetscFree(mfqP->H));
1041   PetscCall(PetscFree(mfqP->Q));
1042   PetscCall(PetscFree(mfqP->Q_tmp));
1043   PetscCall(PetscFree(mfqP->L));
1044   PetscCall(PetscFree(mfqP->L_tmp));
1045   PetscCall(PetscFree(mfqP->L_save));
1046   PetscCall(PetscFree(mfqP->N));
1047   PetscCall(PetscFree(mfqP->M));
1048   PetscCall(PetscFree(mfqP->Z));
1049   PetscCall(PetscFree(mfqP->tau));
1050   PetscCall(PetscFree(mfqP->tau_tmp));
1051   PetscCall(PetscFree(mfqP->npmaxwork));
1052   PetscCall(PetscFree(mfqP->npmaxiwork));
1053   PetscCall(PetscFree(mfqP->xmin));
1054   PetscCall(PetscFree(mfqP->C));
1055   PetscCall(PetscFree(mfqP->Fdiff));
1056   PetscCall(PetscFree(mfqP->Disp));
1057   PetscCall(PetscFree(mfqP->Gres));
1058   PetscCall(PetscFree(mfqP->Hres));
1059   PetscCall(PetscFree(mfqP->Gpoints));
1060   PetscCall(PetscFree(mfqP->model_indices));
1061   PetscCall(PetscFree(mfqP->last_model_indices));
1062   PetscCall(PetscFree(mfqP->Xsubproblem));
1063   PetscCall(PetscFree(mfqP->Gdel));
1064   PetscCall(PetscFree(mfqP->Hdel));
1065   PetscCall(PetscFree(mfqP->indices));
1066   PetscCall(PetscFree(mfqP->iwork));
1067   PetscCall(PetscFree(mfqP->w));
1068   for (i = 0; i < mfqP->nHist; i++) {
1069     PetscCall(VecDestroy(&mfqP->Xhist[i]));
1070     PetscCall(VecDestroy(&mfqP->Fhist[i]));
1071   }
1072   PetscCall(VecDestroy(&mfqP->workxvec));
1073   PetscCall(VecDestroy(&mfqP->workfvec));
1074   PetscCall(PetscFree(mfqP->Xhist));
1075   PetscCall(PetscFree(mfqP->Fhist));
1076 
1077   if (mfqP->size > 1) {
1078     PetscCall(VecDestroy(&mfqP->localx));
1079     PetscCall(VecDestroy(&mfqP->localxmin));
1080     PetscCall(VecDestroy(&mfqP->localf));
1081     PetscCall(VecDestroy(&mfqP->localfmin));
1082   }
1083   PetscCall(PetscFree(tao->data));
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 static PetscErrorCode TaoSetFromOptions_POUNDERS(Tao tao, PetscOptionItems *PetscOptionsObject) {
1088   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1089 
1090   PetscFunctionBegin;
1091   PetscOptionsHeadBegin(PetscOptionsObject, "POUNDERS method for least-squares optimization");
1092   PetscCall(PetscOptionsReal("-tao_pounders_delta", "initial delta", "", mfqP->delta, &mfqP->delta0, NULL));
1093   mfqP->delta = mfqP->delta0;
1094   PetscCall(PetscOptionsInt("-tao_pounders_npmax", "max number of points in model", "", mfqP->npmax, &mfqP->npmax, NULL));
1095   PetscCall(PetscOptionsBool("-tao_pounders_gqt", "use gqt algorithm for subproblem", "", mfqP->usegqt, &mfqP->usegqt, NULL));
1096   PetscOptionsHeadEnd();
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 static PetscErrorCode TaoView_POUNDERS(Tao tao, PetscViewer viewer) {
1101   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1102   PetscBool     isascii;
1103 
1104   PetscFunctionBegin;
1105   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1106   if (isascii) {
1107     PetscCall(PetscViewerASCIIPrintf(viewer, "initial delta: %g\n", (double)mfqP->delta0));
1108     PetscCall(PetscViewerASCIIPrintf(viewer, "final delta: %g\n", (double)mfqP->delta));
1109     PetscCall(PetscViewerASCIIPrintf(viewer, "model points: %" PetscInt_FMT "\n", mfqP->nmodelpoints));
1110     if (mfqP->usegqt) {
1111       PetscCall(PetscViewerASCIIPrintf(viewer, "subproblem solver: gqt\n"));
1112     } else {
1113       PetscCall(TaoView(mfqP->subtao, viewer));
1114     }
1115   }
1116   PetscFunctionReturn(0);
1117 }
1118 /*MC
1119   TAOPOUNDERS - POUNDERS derivate-free model-based algorithm for nonlinear least squares
1120 
1121   Options Database Keys:
1122 + -tao_pounders_delta - initial step length
1123 . -tao_pounders_npmax - maximum number of points in model
1124 - -tao_pounders_gqt - use gqt algorithm for subproblem instead of TRON
1125 
1126   Level: beginner
1127 
1128 M*/
1129 
1130 PETSC_EXTERN PetscErrorCode TaoCreate_POUNDERS(Tao tao) {
1131   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1132 
1133   PetscFunctionBegin;
1134   tao->ops->setup          = TaoSetUp_POUNDERS;
1135   tao->ops->solve          = TaoSolve_POUNDERS;
1136   tao->ops->view           = TaoView_POUNDERS;
1137   tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS;
1138   tao->ops->destroy        = TaoDestroy_POUNDERS;
1139 
1140   PetscCall(PetscNewLog(tao, &mfqP));
1141   tao->data = (void *)mfqP;
1142   /* Override default settings (unless already changed) */
1143   if (!tao->max_it_changed) tao->max_it = 2000;
1144   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
1145   mfqP->npmax      = PETSC_DEFAULT;
1146   mfqP->delta0     = 0.1;
1147   mfqP->delta      = 0.1;
1148   mfqP->deltamax   = 1e3;
1149   mfqP->deltamin   = 1e-6;
1150   mfqP->c2         = 10.0;
1151   mfqP->theta1     = 1e-5;
1152   mfqP->theta2     = 1e-4;
1153   mfqP->gamma0     = 0.5;
1154   mfqP->gamma1     = 2.0;
1155   mfqP->eta0       = 0.0;
1156   mfqP->eta1       = 0.1;
1157   mfqP->usegqt     = PETSC_FALSE;
1158   mfqP->gqt_rtol   = 0.001;
1159   mfqP->gqt_maxits = 50;
1160   mfqP->workxvec   = NULL;
1161   PetscFunctionReturn(0);
1162 }
1163