xref: /petsc/src/tao/leastsquares/impls/pounders/pounders.c (revision 4e8208cbcbc709572b8abe32f33c78b69c819375)
1aaa7dc30SBarry Smith #include <../src/tao/leastsquares/impls/pounders/pounders.h>
2a7e14dcfSSatish Balay 
3*2a8381b2SBarry Smith static PetscErrorCode pounders_h(Tao subtao, Vec v, Mat H, Mat Hpre, PetscCtx ctx)
4d71ae5a4SJacob Faibussowitsch {
5a7e14dcfSSatish Balay   PetscFunctionBegin;
63ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
7a7e14dcfSSatish Balay }
8ca88a2e0STodd Munson 
9*2a8381b2SBarry Smith static PetscErrorCode pounders_fg(Tao subtao, Vec x, PetscReal *f, Vec g, PetscCtx ctx)
10d71ae5a4SJacob Faibussowitsch {
11a7e14dcfSSatish Balay   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)ctx;
12a7e14dcfSSatish Balay   PetscReal     d1, d2;
13f06e3bfaSBarry Smith 
14a7e14dcfSSatish Balay   PetscFunctionBegin;
15a7e14dcfSSatish Balay   /* g = A*x  (add b later)*/
169566063dSJacob Faibussowitsch   PetscCall(MatMult(mfqP->subH, x, g));
17a7e14dcfSSatish Balay 
18a7e14dcfSSatish Balay   /* f = 1/2 * x'*(Ax) + b'*x  */
199566063dSJacob Faibussowitsch   PetscCall(VecDot(x, g, &d1));
209566063dSJacob Faibussowitsch   PetscCall(VecDot(mfqP->subb, x, &d2));
21a7e14dcfSSatish Balay   *f = 0.5 * d1 + d2;
22a7e14dcfSSatish Balay 
23a7e14dcfSSatish Balay   /* now  g = g + b */
249566063dSJacob Faibussowitsch   PetscCall(VecAXPY(g, 1.0, mfqP->subb));
253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
26a7e14dcfSSatish Balay }
27ca88a2e0STodd Munson 
28d71ae5a4SJacob Faibussowitsch static PetscErrorCode pounders_feval(Tao tao, Vec x, Vec F, PetscReal *fsum)
29d71ae5a4SJacob Faibussowitsch {
30125ddc8aSJason Sarich   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
31125ddc8aSJason Sarich   PetscInt      i, row, col;
32125ddc8aSJason Sarich   PetscReal     fr, fc;
33691b26d3SBarry Smith 
34125ddc8aSJason Sarich   PetscFunctionBegin;
359566063dSJacob Faibussowitsch   PetscCall(TaoComputeResidual(tao, x, F));
36737f463aSAlp Dener   if (tao->res_weights_v) {
379566063dSJacob Faibussowitsch     PetscCall(VecPointwiseMult(mfqP->workfvec, tao->res_weights_v, F));
389566063dSJacob Faibussowitsch     PetscCall(VecDot(mfqP->workfvec, mfqP->workfvec, fsum));
39737f463aSAlp Dener   } else if (tao->res_weights_w) {
40125ddc8aSJason Sarich     *fsum = 0;
41737f463aSAlp Dener     for (i = 0; i < tao->res_weights_n; i++) {
42737f463aSAlp Dener       row = tao->res_weights_rows[i];
43737f463aSAlp Dener       col = tao->res_weights_cols[i];
449566063dSJacob Faibussowitsch       PetscCall(VecGetValues(F, 1, &row, &fr));
459566063dSJacob Faibussowitsch       PetscCall(VecGetValues(F, 1, &col, &fc));
46737f463aSAlp Dener       *fsum += tao->res_weights_w[i] * fc * fr;
47125ddc8aSJason Sarich     }
48125ddc8aSJason Sarich   } else {
499566063dSJacob Faibussowitsch     PetscCall(VecDot(F, F, fsum));
50125ddc8aSJason Sarich   }
519566063dSJacob Faibussowitsch   PetscCall(PetscInfo(tao, "Least-squares residual norm: %20.19e\n", (double)*fsum));
5276c63389SBarry Smith   PetscCheck(!PetscIsInfOrNanReal(*fsum), PETSC_COMM_SELF, PETSC_ERR_USER, "User provided compute function generated infinity or NaN");
533ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
54125ddc8aSJason Sarich }
55f06e3bfaSBarry Smith 
56d71ae5a4SJacob Faibussowitsch static PetscErrorCode gqtwrap(Tao tao, PetscReal *gnorm, PetscReal *qmin)
57d71ae5a4SJacob Faibussowitsch {
586f4723b1SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
596f4723b1SBarry Smith   PetscReal atol = 1.0e-5;
606f4723b1SBarry Smith #else
61a7e14dcfSSatish Balay   PetscReal atol = 1.0e-10;
626f4723b1SBarry Smith #endif
63a7e14dcfSSatish Balay   PetscInt      info, its;
64a7e14dcfSSatish Balay   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
65a7e14dcfSSatish Balay 
66a7e14dcfSSatish Balay   PetscFunctionBegin;
67a7e14dcfSSatish Balay   if (!mfqP->usegqt) {
68a7e14dcfSSatish Balay     PetscReal maxval;
69a7e14dcfSSatish Balay     PetscInt  i, j;
70a7e14dcfSSatish Balay 
719566063dSJacob Faibussowitsch     PetscCall(VecSetValues(mfqP->subb, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
729566063dSJacob Faibussowitsch     PetscCall(VecAssemblyBegin(mfqP->subb));
739566063dSJacob Faibussowitsch     PetscCall(VecAssemblyEnd(mfqP->subb));
74a7e14dcfSSatish Balay 
759566063dSJacob Faibussowitsch     PetscCall(VecSet(mfqP->subx, 0.0));
76a7e14dcfSSatish Balay 
779566063dSJacob Faibussowitsch     PetscCall(VecSet(mfqP->subndel, -1.0));
789566063dSJacob Faibussowitsch     PetscCall(VecSet(mfqP->subpdel, +1.0));
79a7e14dcfSSatish Balay 
80ce902467SBarry Smith     /* Complete the lower triangle of the Hessian matrix */
81a7e14dcfSSatish Balay     for (i = 0; i < mfqP->n; i++) {
82ad540459SPierre Jolivet       for (j = i + 1; j < mfqP->n; j++) mfqP->Hres[j + mfqP->n * i] = mfqP->Hres[mfqP->n * j + i];
83a7e14dcfSSatish Balay     }
849566063dSJacob Faibussowitsch     PetscCall(MatSetValues(mfqP->subH, mfqP->n, mfqP->indices, mfqP->n, mfqP->indices, mfqP->Hres, INSERT_VALUES));
859566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(mfqP->subH, MAT_FINAL_ASSEMBLY));
869566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(mfqP->subH, MAT_FINAL_ASSEMBLY));
87a7e14dcfSSatish Balay 
889566063dSJacob Faibussowitsch     PetscCall(TaoResetStatistics(mfqP->subtao));
89fb842aefSJose E. Roman     /* PetscCall(TaoSetTolerances(mfqP->subtao,*gnorm,*gnorm,PETSC_CURRENT)); */
90a7e14dcfSSatish Balay     /* enforce bound constraints -- experimental */
91a7e14dcfSSatish Balay     if (tao->XU && tao->XL) {
929566063dSJacob Faibussowitsch       PetscCall(VecCopy(tao->XU, mfqP->subxu));
939566063dSJacob Faibussowitsch       PetscCall(VecAXPY(mfqP->subxu, -1.0, tao->solution));
949566063dSJacob Faibussowitsch       PetscCall(VecScale(mfqP->subxu, 1.0 / mfqP->delta));
959566063dSJacob Faibussowitsch       PetscCall(VecCopy(tao->XL, mfqP->subxl));
969566063dSJacob Faibussowitsch       PetscCall(VecAXPY(mfqP->subxl, -1.0, tao->solution));
979566063dSJacob Faibussowitsch       PetscCall(VecScale(mfqP->subxl, 1.0 / mfqP->delta));
98a7e14dcfSSatish Balay 
999566063dSJacob Faibussowitsch       PetscCall(VecPointwiseMin(mfqP->subxu, mfqP->subxu, mfqP->subpdel));
1009566063dSJacob Faibussowitsch       PetscCall(VecPointwiseMax(mfqP->subxl, mfqP->subxl, mfqP->subndel));
101a7e14dcfSSatish Balay     } else {
1029566063dSJacob Faibussowitsch       PetscCall(VecCopy(mfqP->subpdel, mfqP->subxu));
1039566063dSJacob Faibussowitsch       PetscCall(VecCopy(mfqP->subndel, mfqP->subxl));
104a7e14dcfSSatish Balay     }
105a7e14dcfSSatish Balay     /* Make sure xu > xl */
1069566063dSJacob Faibussowitsch     PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
1079566063dSJacob Faibussowitsch     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
1089566063dSJacob Faibussowitsch     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
1093c859ba3SBarry Smith     PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "upper bound < lower bound in subproblem");
110a7e14dcfSSatish Balay     /* Make sure xu > tao->solution > xl */
1119566063dSJacob Faibussowitsch     PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
1129566063dSJacob Faibussowitsch     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx));
1139566063dSJacob Faibussowitsch     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
1143c859ba3SBarry Smith     PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess < lower bound in subproblem");
115a7e14dcfSSatish Balay 
1169566063dSJacob Faibussowitsch     PetscCall(VecCopy(mfqP->subx, mfqP->subpdel));
1179566063dSJacob Faibussowitsch     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
1189566063dSJacob Faibussowitsch     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
1193c859ba3SBarry Smith     PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess > upper bound in subproblem");
120a7e14dcfSSatish Balay 
1219566063dSJacob Faibussowitsch     PetscCall(TaoSolve(mfqP->subtao));
1229566063dSJacob Faibussowitsch     PetscCall(TaoGetSolutionStatus(mfqP->subtao, NULL, qmin, NULL, NULL, NULL, NULL));
123a7e14dcfSSatish Balay 
124a7e14dcfSSatish Balay     /* test bounds post-solution*/
1259566063dSJacob Faibussowitsch     PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel));
1269566063dSJacob Faibussowitsch     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx));
1279566063dSJacob Faibussowitsch     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
128a7e14dcfSSatish Balay     if (maxval > 1e-5) {
1299566063dSJacob Faibussowitsch       PetscCall(PetscInfo(tao, "subproblem solution < lower bound\n"));
130a7e14dcfSSatish Balay       tao->reason = TAO_DIVERGED_TR_REDUCTION;
131a7e14dcfSSatish Balay     }
132a7e14dcfSSatish Balay 
1339566063dSJacob Faibussowitsch     PetscCall(VecCopy(mfqP->subx, mfqP->subpdel));
1349566063dSJacob Faibussowitsch     PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu));
1359566063dSJacob Faibussowitsch     PetscCall(VecMax(mfqP->subpdel, NULL, &maxval));
136a7e14dcfSSatish Balay     if (maxval > 1e-5) {
1379566063dSJacob Faibussowitsch       PetscCall(PetscInfo(tao, "subproblem solution > upper bound\n"));
138a7e14dcfSSatish Balay       tao->reason = TAO_DIVERGED_TR_REDUCTION;
139a7e14dcfSSatish Balay     }
140a7e14dcfSSatish Balay   } else {
1413ba16761SJacob Faibussowitsch     PetscCall(gqt(mfqP->n, mfqP->Hres, mfqP->n, mfqP->Gres, 1.0, mfqP->gqt_rtol, atol, mfqP->gqt_maxits, gnorm, qmin, mfqP->Xsubproblem, &info, &its, mfqP->work, mfqP->work2, mfqP->work3));
142a7e14dcfSSatish Balay   }
143a7e14dcfSSatish Balay   *qmin *= -1;
1443ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
145a7e14dcfSSatish Balay }
146a7e14dcfSSatish Balay 
147d71ae5a4SJacob Faibussowitsch static PetscErrorCode pounders_update_res(Tao tao)
148d71ae5a4SJacob Faibussowitsch {
149125ddc8aSJason Sarich   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
150125ddc8aSJason Sarich   PetscInt      i, row, col;
1516497c311SBarry Smith   PetscBLASInt  blasn, blasn2, blasm, ione = 1;
1526c95174dSJason Sarich   PetscReal     zero = 0.0, one = 1.0, wii, factor;
153125ddc8aSJason Sarich 
154691b26d3SBarry Smith   PetscFunctionBegin;
1556497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
1566497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->m, &blasm));
1576497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->n * mfqP->n, &blasn2));
158ad540459SPierre Jolivet   for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] = 0;
159ad540459SPierre Jolivet   for (i = 0; i < mfqP->n * mfqP->n; i++) mfqP->Hres[i] = 0;
160125ddc8aSJason Sarich 
161125ddc8aSJason Sarich   /* Compute Gres= sum_ij[wij * (cjgi + cigj)] */
162737f463aSAlp Dener   if (tao->res_weights_v) {
1636c95174dSJason Sarich     /* Vector(diagonal) weights: gres = sum_i(wii*ci*gi) */
164125ddc8aSJason Sarich     for (i = 0; i < mfqP->m; i++) {
1659566063dSJacob Faibussowitsch       PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &factor));
1666c95174dSJason Sarich       factor = factor * mfqP->C[i];
167792fecdfSBarry Smith       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * i], &ione, mfqP->Gres, &ione));
168125ddc8aSJason Sarich     }
169125ddc8aSJason Sarich 
170125ddc8aSJason Sarich     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
1716c95174dSJason Sarich     /* vector(diagonal weights) Hres = sum_i(wii*(ci*Hi + gi * gi')*/
172125ddc8aSJason Sarich     for (i = 0; i < mfqP->m; i++) {
1739566063dSJacob Faibussowitsch       PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &wii));
174125ddc8aSJason Sarich       if (tao->niter > 1) {
175125ddc8aSJason Sarich         factor = wii * mfqP->C[i];
1766c95174dSJason Sarich         /* add wii * ci * Hi */
177792fecdfSBarry Smith         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
178125ddc8aSJason Sarich       }
1796c95174dSJason Sarich       /* add wii * gi * gi' */
180792fecdfSBarry Smith       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &wii, &mfqP->Fdiff[blasn * i], &blasn, &mfqP->Fdiff[blasn * i], &blasn, &one, mfqP->Hres, &blasn));
181125ddc8aSJason Sarich     }
182737f463aSAlp Dener   } else if (tao->res_weights_w) {
183ce902467SBarry Smith     /* General case: .5 * Gres= sum_ij[wij * (cjgi + cigj)] */
184737f463aSAlp Dener     for (i = 0; i < tao->res_weights_n; i++) {
185737f463aSAlp Dener       row = tao->res_weights_rows[i];
186737f463aSAlp Dener       col = tao->res_weights_cols[i];
187ce902467SBarry Smith 
188737f463aSAlp Dener       factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
189792fecdfSBarry Smith       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * row], &ione, mfqP->Gres, &ione));
190737f463aSAlp Dener       factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
191792fecdfSBarry Smith       PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * col], &ione, mfqP->Gres, &ione));
192ce902467SBarry Smith     }
193ce902467SBarry Smith 
194ce902467SBarry Smith     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
1956c95174dSJason Sarich     /* .5 * sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
196737f463aSAlp Dener     for (i = 0; i < tao->res_weights_n; i++) {
197737f463aSAlp Dener       row    = tao->res_weights_rows[i];
198737f463aSAlp Dener       col    = tao->res_weights_cols[i];
199737f463aSAlp Dener       factor = tao->res_weights_w[i] / 2.0;
200125ddc8aSJason Sarich       /* add wij * gi gj' + wij * gj gi' */
201792fecdfSBarry Smith       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * row], &blasn, &mfqP->Fdiff[blasn * col], &blasn, &one, mfqP->Hres, &blasn));
202792fecdfSBarry Smith       PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * col], &blasn, &mfqP->Fdiff[blasn * row], &blasn, &one, mfqP->Hres, &blasn));
203125ddc8aSJason Sarich     }
204125ddc8aSJason Sarich     if (tao->niter > 1) {
205737f463aSAlp Dener       for (i = 0; i < tao->res_weights_n; i++) {
206737f463aSAlp Dener         row = tao->res_weights_rows[i];
207737f463aSAlp Dener         col = tao->res_weights_cols[i];
208125ddc8aSJason Sarich 
209125ddc8aSJason Sarich         /* add  wij*cj*Hi */
210737f463aSAlp Dener         factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0;
211792fecdfSBarry Smith         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[row], &blasm, mfqP->Hres, &ione));
212125ddc8aSJason Sarich 
213125ddc8aSJason Sarich         /* add wij*ci*Hj */
214737f463aSAlp Dener         factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0;
215792fecdfSBarry Smith         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[col], &blasm, mfqP->Hres, &ione));
216125ddc8aSJason Sarich       }
217125ddc8aSJason Sarich     }
218125ddc8aSJason Sarich   } else {
219ce902467SBarry Smith     /* Default: Gres= sum_i[cigi] = G*c' */
2209566063dSJacob Faibussowitsch     PetscCall(PetscInfo(tao, "Identity weights\n"));
221792fecdfSBarry Smith     PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->C, &ione, &zero, mfqP->Gres, &ione));
222ce902467SBarry Smith 
223ce902467SBarry Smith     /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */
2246c95174dSJason Sarich     /*  Hres = G*G' + 0.5 sum {F(xkin,i)*H(:,:,i)}  */
225792fecdfSBarry Smith     PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->Fdiff, &blasn, &zero, mfqP->Hres, &blasn));
226125ddc8aSJason Sarich 
227125ddc8aSJason Sarich     /* sum(F(xkin,i)*H(:,:,i)) */
228125ddc8aSJason Sarich     if (tao->niter > 1) {
229125ddc8aSJason Sarich       for (i = 0; i < mfqP->m; i++) {
2306c95174dSJason Sarich         factor = mfqP->C[i];
231792fecdfSBarry Smith         PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione));
232125ddc8aSJason Sarich       }
233125ddc8aSJason Sarich     }
234125ddc8aSJason Sarich   }
2353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
236125ddc8aSJason Sarich }
237ca88a2e0STodd Munson 
238d71ae5a4SJacob Faibussowitsch static PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi)
239d71ae5a4SJacob Faibussowitsch {
240a7e14dcfSSatish Balay   /* Phi = .5*[x(1)^2  sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */
241a7e14dcfSSatish Balay   PetscInt  i, j, k;
242a7e14dcfSSatish Balay   PetscReal sqrt2 = PetscSqrtReal(2.0);
243f06e3bfaSBarry Smith 
244a7e14dcfSSatish Balay   PetscFunctionBegin;
245a7e14dcfSSatish Balay   j = 0;
246a7e14dcfSSatish Balay   for (i = 0; i < n; i++) {
247a7e14dcfSSatish Balay     phi[j] = 0.5 * x[i] * x[i];
248a7e14dcfSSatish Balay     j++;
249a7e14dcfSSatish Balay     for (k = i + 1; k < n; k++) {
250a7e14dcfSSatish Balay       phi[j] = x[i] * x[k] / sqrt2;
251a7e14dcfSSatish Balay       j++;
252a7e14dcfSSatish Balay     }
253a7e14dcfSSatish Balay   }
2543ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
255a7e14dcfSSatish Balay }
256a7e14dcfSSatish Balay 
257d71ae5a4SJacob Faibussowitsch static PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP)
258d71ae5a4SJacob Faibussowitsch {
259a7e14dcfSSatish Balay   /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x'
260a7e14dcfSSatish Balay    that satisfies the interpolation conditions Q(X[:,j]) = f(j)
261a7e14dcfSSatish Balay    for j=1,...,m and with a Hessian matrix of least Frobenius norm */
262a7e14dcfSSatish Balay 
263a7e14dcfSSatish Balay   /* NB --we are ignoring c */
264a7e14dcfSSatish Balay   PetscInt     i, j, k, num, np = mfqP->nmodelpoints;
265a7e14dcfSSatish Balay   PetscReal    one = 1.0, zero = 0.0, negone = -1.0;
2666497c311SBarry Smith   PetscBLASInt blasnpmax, blasnplus1, blasnp, blasint, blasint2;
267f06e3bfaSBarry Smith   PetscBLASInt info, ione = 1;
268a7e14dcfSSatish Balay   PetscReal    sqrt2 = PetscSqrtReal(2.0);
269a7e14dcfSSatish Balay 
270a7e14dcfSSatish Balay   PetscFunctionBegin;
2716497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
2726497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->n + 1, &blasnplus1));
2736497c311SBarry Smith   PetscCall(PetscBLASIntCast(np, &blasnp));
2746497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->n * (mfqP->n + 1) / 2, &blasint));
2756497c311SBarry Smith   PetscCall(PetscBLASIntCast(np - mfqP->n - 1, &blasint2));
276ad540459SPierre Jolivet   for (i = 0; i < mfqP->n * mfqP->m; i++) mfqP->Gdel[i] = 0;
277ad540459SPierre Jolivet   for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; i++) mfqP->Hdel[i] = 0;
278a7e14dcfSSatish Balay 
279a7e14dcfSSatish Balay   /* factor M */
280792fecdfSBarry Smith   PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&blasnplus1, &blasnp, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &info));
28163a3b9bcSJacob Faibussowitsch   PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrf returned with value %" PetscBLASInt_FMT, info);
282a7e14dcfSSatish Balay 
283a7e14dcfSSatish Balay   if (np == mfqP->n + 1) {
284ad540459SPierre Jolivet     for (i = 0; i < mfqP->npmax - mfqP->n - 1; i++) mfqP->omega[i] = 0.0;
285ad540459SPierre Jolivet     for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->beta[i] = 0.0;
286a7e14dcfSSatish Balay   } else {
287a7e14dcfSSatish Balay     /* Let Ltmp = (L'*L) */
288792fecdfSBarry Smith     PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &blasint2, &blasint2, &blasint, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &zero, mfqP->L_tmp, &blasint));
289a7e14dcfSSatish Balay 
290a7e14dcfSSatish Balay     /* factor Ltmp */
291792fecdfSBarry Smith     PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &blasint2, mfqP->L_tmp, &blasint, &info));
29263a3b9bcSJacob Faibussowitsch     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrf returned with value %" PetscBLASInt_FMT, info);
293a7e14dcfSSatish Balay   }
294a7e14dcfSSatish Balay 
295a7e14dcfSSatish Balay   for (k = 0; k < mfqP->m; k++) {
296a7e14dcfSSatish Balay     if (np != mfqP->n + 1) {
297a7e14dcfSSatish Balay       /* Solve L'*L*Omega = Z' * RESk*/
298792fecdfSBarry Smith       PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasnp, &blasint2, &one, mfqP->Z, &blasnpmax, &mfqP->RES[mfqP->npmax * k], &ione, &zero, mfqP->omega, &ione));
299792fecdfSBarry Smith       PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &blasint2, &ione, mfqP->L_tmp, &blasint, mfqP->omega, &blasint2, &info));
30063a3b9bcSJacob Faibussowitsch       PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrs returned with value %" PetscBLASInt_FMT, info);
301a7e14dcfSSatish Balay 
302a7e14dcfSSatish Balay       /* Beta = L*Omega */
303792fecdfSBarry Smith       PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasint, &blasint2, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, mfqP->omega, &ione, &zero, mfqP->beta, &ione));
304a7e14dcfSSatish Balay     }
305a7e14dcfSSatish Balay 
306a7e14dcfSSatish Balay     /* solve M'*Alpha = RESk - N'*Beta */
307792fecdfSBarry Smith     PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasint, &blasnp, &negone, mfqP->N, &blasint, mfqP->beta, &ione, &one, &mfqP->RES[mfqP->npmax * k], &ione));
308792fecdfSBarry Smith     PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &blasnplus1, &ione, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &mfqP->RES[mfqP->npmax * k], &blasnplus1, &info));
30963a3b9bcSJacob Faibussowitsch     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrs returned with value %" PetscBLASInt_FMT, info);
310a7e14dcfSSatish Balay 
311a7e14dcfSSatish Balay     /* Gdel(:,k) = Alpha(2:n+1) */
312ad540459SPierre Jolivet     for (i = 0; i < mfqP->n; i++) mfqP->Gdel[i + mfqP->n * k] = mfqP->RES[mfqP->npmax * k + i + 1];
313a7e14dcfSSatish Balay 
314a7e14dcfSSatish Balay     /* Set Hdels */
315a7e14dcfSSatish Balay     num = 0;
316a7e14dcfSSatish Balay     for (i = 0; i < mfqP->n; i++) {
317a7e14dcfSSatish Balay       /* H[i,i,k] = Beta(num) */
318a7e14dcfSSatish Balay       mfqP->Hdel[(i * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num];
319a7e14dcfSSatish Balay       num++;
320a7e14dcfSSatish Balay       for (j = i + 1; j < mfqP->n; j++) {
321a7e14dcfSSatish Balay         /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */
322a7e14dcfSSatish Balay         mfqP->Hdel[(j * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
323a7e14dcfSSatish Balay         mfqP->Hdel[(i * mfqP->n + j) * mfqP->m + k] = mfqP->beta[num] / sqrt2;
324a7e14dcfSSatish Balay         num++;
325a7e14dcfSSatish Balay       }
326a7e14dcfSSatish Balay     }
327a7e14dcfSSatish Balay   }
3283ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
329a7e14dcfSSatish Balay }
330a7e14dcfSSatish Balay 
331d71ae5a4SJacob Faibussowitsch static PetscErrorCode morepoints(TAO_POUNDERS *mfqP)
332d71ae5a4SJacob Faibussowitsch {
333a7e14dcfSSatish Balay   /* Assumes mfqP->model_indices[0]  is minimum index
334a7e14dcfSSatish Balay    Finishes adding points to mfqP->model_indices (up to npmax)
335a7e14dcfSSatish Balay    Computes L,Z,M,N
336a7e14dcfSSatish Balay    np is actual number of points in model (should equal npmax?) */
337a7e14dcfSSatish Balay   PetscInt         point, i, j, offset;
338a7e14dcfSSatish Balay   PetscInt         reject;
3396497c311SBarry Smith   PetscBLASInt     blasn, blasnpmax, blasnplus1, info, blasnmax, blasint, blasint2, blasnp, blasmaxmn;
3405e081366SBarry Smith   const PetscReal *x;
3415e081366SBarry Smith   PetscReal        normd;
342a7e14dcfSSatish Balay 
343f06e3bfaSBarry Smith   PetscFunctionBegin;
3446497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
3456497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
3466497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->nmax, &blasnmax));
3476497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->n + 1, &blasnplus1));
3486497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->n, &blasnp));
349a7e14dcfSSatish Balay   /* Initialize M,N */
350a7e14dcfSSatish Balay   for (i = 0; i < mfqP->n + 1; i++) {
3519566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x));
352a7e14dcfSSatish Balay     mfqP->M[(mfqP->n + 1) * i] = 1.0;
353ad540459SPierre Jolivet     for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * i)] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
3549566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x));
3559566063dSJacob Faibussowitsch     PetscCall(phi2eval(&mfqP->M[1 + ((mfqP->n + 1) * i)], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * i]));
356a7e14dcfSSatish Balay   }
357a7e14dcfSSatish Balay 
358a7e14dcfSSatish Balay   /* Now we add points until we have npmax starting with the most recent ones */
359a7e14dcfSSatish Balay   point              = mfqP->nHist - 1;
360a7e14dcfSSatish Balay   mfqP->nmodelpoints = mfqP->n + 1;
361a7e14dcfSSatish Balay   while (mfqP->nmodelpoints < mfqP->npmax && point >= 0) {
362a7e14dcfSSatish Balay     /* Reject any points already in the model */
363a7e14dcfSSatish Balay     reject = 0;
364a7e14dcfSSatish Balay     for (j = 0; j < mfqP->n + 1; j++) {
365a7e14dcfSSatish Balay       if (point == mfqP->model_indices[j]) {
366a7e14dcfSSatish Balay         reject = 1;
367a7e14dcfSSatish Balay         break;
368a7e14dcfSSatish Balay       }
369a7e14dcfSSatish Balay     }
370a7e14dcfSSatish Balay 
371a7e14dcfSSatish Balay     /* Reject if norm(d) >c2 */
372a7e14dcfSSatish Balay     if (!reject) {
3739566063dSJacob Faibussowitsch       PetscCall(VecCopy(mfqP->Xhist[point], mfqP->workxvec));
3749566063dSJacob Faibussowitsch       PetscCall(VecAXPY(mfqP->workxvec, -1.0, mfqP->Xhist[mfqP->minindex]));
3759566063dSJacob Faibussowitsch       PetscCall(VecNorm(mfqP->workxvec, NORM_2, &normd));
376a7e14dcfSSatish Balay       normd /= mfqP->delta;
377ad540459SPierre Jolivet       if (normd > mfqP->c2) reject = 1;
378a7e14dcfSSatish Balay     }
379f06e3bfaSBarry Smith     if (reject) {
380a7e14dcfSSatish Balay       point--;
381a7e14dcfSSatish Balay       continue;
382a7e14dcfSSatish Balay     }
383a7e14dcfSSatish Balay 
3849566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(mfqP->Xhist[point], &x));
385a7e14dcfSSatish Balay     mfqP->M[(mfqP->n + 1) * mfqP->nmodelpoints] = 1.0;
386ad540459SPierre Jolivet     for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * mfqP->nmodelpoints)] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
3879566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(mfqP->Xhist[point], &x));
3889566063dSJacob Faibussowitsch     PetscCall(phi2eval(&mfqP->M[1 + (mfqP->n + 1) * mfqP->nmodelpoints], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * (mfqP->nmodelpoints)]));
389a7e14dcfSSatish Balay 
390a7e14dcfSSatish Balay     /* Update QR factorization */
391a7e14dcfSSatish Balay     /* Copy M' to Q_tmp */
392a7e14dcfSSatish Balay     for (i = 0; i < mfqP->n + 1; i++) {
393ad540459SPierre Jolivet       for (j = 0; j < mfqP->npmax; j++) mfqP->Q_tmp[j + mfqP->npmax * i] = mfqP->M[i + (mfqP->n + 1) * j];
394a7e14dcfSSatish Balay     }
3956497c311SBarry Smith     PetscCall(PetscBLASIntCast(mfqP->nmodelpoints + 1, &blasnp));
396a7e14dcfSSatish Balay     /* Q_tmp,R = qr(M') */
3976497c311SBarry Smith     PetscCall(PetscBLASIntCast(PetscMax(mfqP->m, mfqP->n + 1), &blasmaxmn));
398792fecdfSBarry Smith     PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->mwork, &blasmaxmn, &info));
39963a3b9bcSJacob Faibussowitsch     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine geqrf returned with value %" PetscBLASInt_FMT, info);
400a7e14dcfSSatish Balay 
401a7e14dcfSSatish Balay     /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */
402a7e14dcfSSatish Balay     /* L = N*Qtmp */
4036497c311SBarry Smith     PetscCall(PetscBLASIntCast(mfqP->n * (mfqP->n + 1) / 2, &blasint2));
404a7e14dcfSSatish Balay     /* Copy N to L_tmp */
405ad540459SPierre Jolivet     for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2 * mfqP->npmax; i++) mfqP->L_tmp[i] = mfqP->N[i];
406a7e14dcfSSatish Balay     /* Copy L_save to L_tmp */
407a7e14dcfSSatish Balay 
408a7e14dcfSSatish Balay     /* L_tmp = N*Qtmp' */
409792fecdfSBarry Smith     PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasint2, &blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->L_tmp, &blasint2, mfqP->npmaxwork, &blasnmax, &info));
41063a3b9bcSJacob Faibussowitsch     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info);
411a7e14dcfSSatish Balay 
412a7e14dcfSSatish Balay     /* Copy L_tmp to L_save */
413ad540459SPierre Jolivet     for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L_save[i] = mfqP->L_tmp[i];
414a7e14dcfSSatish Balay 
415a7e14dcfSSatish Balay     /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */
4166497c311SBarry Smith     PetscCall(PetscBLASIntCast(mfqP->nmodelpoints - mfqP->n, &blasint));
4179566063dSJacob Faibussowitsch     PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
418792fecdfSBarry Smith     PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &blasint2, &blasint, &mfqP->L_tmp[(mfqP->n + 1) * blasint2], &blasint2, mfqP->beta, mfqP->work, &blasn, mfqP->work, &blasn, mfqP->npmaxwork, &blasnmax, &info));
4199566063dSJacob Faibussowitsch     PetscCall(PetscFPTrapPop());
42063a3b9bcSJacob Faibussowitsch     PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine gesvd returned with value %" PetscBLASInt_FMT, info);
421a7e14dcfSSatish Balay 
422a7e14dcfSSatish Balay     if (mfqP->beta[PetscMin(blasint, blasint2) - 1] > mfqP->theta2) {
423a7e14dcfSSatish Balay       /* accept point */
424a7e14dcfSSatish Balay       mfqP->model_indices[mfqP->nmodelpoints] = point;
425a7e14dcfSSatish Balay       /* Copy Q_tmp to Q */
426ad540459SPierre Jolivet       for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q[i] = mfqP->Q_tmp[i];
427ad540459SPierre Jolivet       for (i = 0; i < mfqP->npmax; i++) mfqP->tau[i] = mfqP->tau_tmp[i];
428a7e14dcfSSatish Balay       mfqP->nmodelpoints++;
4296497c311SBarry Smith       PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blasnp));
430a7e14dcfSSatish Balay 
431a7e14dcfSSatish Balay       /* Copy L_save to L */
432ad540459SPierre Jolivet       for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = mfqP->L_save[i];
433a7e14dcfSSatish Balay     }
434a7e14dcfSSatish Balay     point--;
435a7e14dcfSSatish Balay   }
436a7e14dcfSSatish Balay 
4376497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blasnp));
438a7e14dcfSSatish Balay   /* Copy Q(:,n+2:np) to Z */
439a7e14dcfSSatish Balay   /* First set Q_tmp to I */
440ad540459SPierre Jolivet   for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q_tmp[i] = 0.0;
441ad540459SPierre Jolivet   for (i = 0; i < mfqP->npmax; i++) mfqP->Q_tmp[i + mfqP->npmax * i] = 1.0;
442a7e14dcfSSatish Balay 
443a7e14dcfSSatish Balay   /* Q_tmp = I * Q */
444792fecdfSBarry Smith   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasnp, &blasnp, &blasnplus1, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));
44563a3b9bcSJacob Faibussowitsch   PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info);
446a7e14dcfSSatish Balay 
447a7e14dcfSSatish Balay   /* Copy Q_tmp(:,n+2:np) to Z) */
448a7e14dcfSSatish Balay   offset = mfqP->npmax * (mfqP->n + 1);
449ad540459SPierre Jolivet   for (i = offset; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Z[i - offset] = mfqP->Q_tmp[i];
450a7e14dcfSSatish Balay 
451a7e14dcfSSatish Balay   if (mfqP->nmodelpoints == mfqP->n + 1) {
452a7e14dcfSSatish Balay     /* Set L to I_{n+1} */
453ad540459SPierre Jolivet     for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = 0.0;
454ad540459SPierre Jolivet     for (i = 0; i < mfqP->n; i++) mfqP->L[(mfqP->n * (mfqP->n + 1) / 2) * i + i] = 1.0;
455a7e14dcfSSatish Balay   }
4563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
457a7e14dcfSSatish Balay }
458a7e14dcfSSatish Balay 
459a7e14dcfSSatish Balay /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */
460d71ae5a4SJacob Faibussowitsch static PetscErrorCode addpoint(Tao tao, TAO_POUNDERS *mfqP, PetscInt index)
461d71ae5a4SJacob Faibussowitsch {
462a7e14dcfSSatish Balay   PetscFunctionBegin;
463a7e14dcfSSatish Balay   /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/
4649566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(mfqP->Xhist[0], &mfqP->Xhist[mfqP->nHist]));
4659566063dSJacob Faibussowitsch   PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, &mfqP->Q_tmp[index * mfqP->npmax], INSERT_VALUES));
4669566063dSJacob Faibussowitsch   PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]));
4679566063dSJacob Faibussowitsch   PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]));
4689566063dSJacob Faibussowitsch   PetscCall(VecAYPX(mfqP->Xhist[mfqP->nHist], mfqP->delta, mfqP->Xhist[mfqP->minindex]));
469a7e14dcfSSatish Balay 
470a7e14dcfSSatish Balay   /* Project into feasible region */
47148a46eb9SPierre Jolivet   if (tao->XU && tao->XL) PetscCall(VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU, mfqP->Xhist[mfqP->nHist]));
472a7e14dcfSSatish Balay 
473a7e14dcfSSatish Balay   /* Compute value of new vector */
4749566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(mfqP->Fhist[0], &mfqP->Fhist[mfqP->nHist]));
475a7e14dcfSSatish Balay   CHKMEMQ;
4769566063dSJacob Faibussowitsch   PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]));
477a7e14dcfSSatish Balay 
478a7e14dcfSSatish Balay   /* Add new vector to model */
479a7e14dcfSSatish Balay   mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist;
480a7e14dcfSSatish Balay   mfqP->nmodelpoints++;
481a7e14dcfSSatish Balay   mfqP->nHist++;
4823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
483a7e14dcfSSatish Balay }
484a7e14dcfSSatish Balay 
485d71ae5a4SJacob Faibussowitsch static PetscErrorCode modelimprove(Tao tao, TAO_POUNDERS *mfqP, PetscInt addallpoints)
486d71ae5a4SJacob Faibussowitsch {
487a7e14dcfSSatish Balay   /* modeld = Q(:,np+1:n)' */
488a7e14dcfSSatish Balay   PetscInt     i, j, minindex = 0;
489e270355aSBarry Smith   PetscReal    dp, half = 0.5, one = 1.0, minvalue = PETSC_INFINITY;
4906497c311SBarry Smith   PetscBLASInt blasn, blasnpmax, blask, info;
4916497c311SBarry Smith   PetscBLASInt blas1 = 1, blasnmax;
492f06e3bfaSBarry Smith 
493362febeeSStefano Zampini   PetscFunctionBegin;
4946497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
4956497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
4966497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask));
4976497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->nmax, &blasnmax));
4986497c311SBarry Smith 
499a7e14dcfSSatish Balay   /* Qtmp = I(n x n) */
500a7e14dcfSSatish Balay   for (i = 0; i < mfqP->n; i++) {
501ad540459SPierre Jolivet     for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i + mfqP->npmax * j] = 0.0;
502a7e14dcfSSatish Balay   }
503ad540459SPierre Jolivet   for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[j + mfqP->npmax * j] = 1.0;
504a7e14dcfSSatish Balay 
505a7e14dcfSSatish Balay   /* Qtmp = Q * I */
506792fecdfSBarry Smith   PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasn, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info));
507a7e14dcfSSatish Balay 
508a7e14dcfSSatish Balay   for (i = mfqP->nmodelpoints; i < mfqP->n; i++) {
509792fecdfSBarry Smith     PetscCallBLAS("BLASdot", dp = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->Gres, &blas1));
510a7e14dcfSSatish Balay     if (dp > 0.0) { /* Model says use the other direction! */
511ad540459SPierre Jolivet       for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i * mfqP->npmax + j] *= -1;
512a7e14dcfSSatish Balay     }
513a7e14dcfSSatish Balay     /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */
514ad540459SPierre Jolivet     for (j = 0; j < mfqP->n; j++) mfqP->work2[j] = mfqP->Gres[j];
515792fecdfSBarry Smith     PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &half, mfqP->Hres, &blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, &one, mfqP->work2, &blas1));
516792fecdfSBarry Smith     PetscCallBLAS("BLASdot", mfqP->work[i] = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->work2, &blas1));
517a7e14dcfSSatish Balay     if (i == mfqP->nmodelpoints || mfqP->work[i] < minvalue) {
518a7e14dcfSSatish Balay       minindex = i;
519a7e14dcfSSatish Balay       minvalue = mfqP->work[i];
520a7e14dcfSSatish Balay     }
52148a46eb9SPierre Jolivet     if (addallpoints != 0) PetscCall(addpoint(tao, mfqP, i));
522a7e14dcfSSatish Balay   }
52348a46eb9SPierre Jolivet   if (!addallpoints) PetscCall(addpoint(tao, mfqP, minindex));
5243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
525a7e14dcfSSatish Balay }
526a7e14dcfSSatish Balay 
527d71ae5a4SJacob Faibussowitsch static PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, PetscReal c)
528d71ae5a4SJacob Faibussowitsch {
529a7e14dcfSSatish Balay   PetscInt         i, j;
5306497c311SBarry Smith   PetscBLASInt     blasm, blasj, blask, blasn, ione = 1, info;
5316497c311SBarry Smith   PetscBLASInt     blasnpmax, blasmaxmn;
532a7e14dcfSSatish Balay   PetscReal        proj, normd;
5335e081366SBarry Smith   const PetscReal *x;
534a7e14dcfSSatish Balay 
535f06e3bfaSBarry Smith   PetscFunctionBegin;
5366497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
5376497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->m, &blasm));
5386497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
539a7e14dcfSSatish Balay   for (i = mfqP->nHist - 1; i >= 0; i--) {
5409566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(mfqP->Xhist[i], &x));
541ad540459SPierre Jolivet     for (j = 0; j < mfqP->n; j++) mfqP->work[j] = (x[j] - xmin[j]) / mfqP->delta;
5429566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(mfqP->Xhist[i], &x));
543792fecdfSBarry Smith     PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, mfqP->work2, &ione));
544792fecdfSBarry Smith     PetscCallBLAS("BLASnrm2", normd = BLASnrm2_(&blasn, mfqP->work, &ione));
545a8bd9cb3STodd Munson     if (normd <= c) {
54657508eceSPierre Jolivet       PetscCall(PetscBLASIntCast(PetscMax(mfqP->n - mfqP->nmodelpoints, 0), &blasj));
547a7e14dcfSSatish Balay       if (!mfqP->q_is_I) {
548a7e14dcfSSatish Balay         /* project D onto null */
5496497c311SBarry Smith         PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask));
550792fecdfSBarry Smith         PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &ione, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->work2, &ione, mfqP->mwork, &blasm, &info));
55163a3b9bcSJacob Faibussowitsch         PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "ormqr returned value %" PetscBLASInt_FMT, info);
552a7e14dcfSSatish Balay       }
553792fecdfSBarry Smith       PetscCallBLAS("BLASnrm2", proj = BLASnrm2_(&blasj, &mfqP->work2[mfqP->nmodelpoints], &ione));
554a7e14dcfSSatish Balay 
555a7e14dcfSSatish Balay       if (proj >= mfqP->theta1) { /* add this index to model */
556a7e14dcfSSatish Balay         mfqP->model_indices[mfqP->nmodelpoints] = i;
557a7e14dcfSSatish Balay         mfqP->nmodelpoints++;
558792fecdfSBarry Smith         PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, &mfqP->Q_tmp[mfqP->npmax * (mfqP->nmodelpoints - 1)], &ione));
5596497c311SBarry Smith         PetscCall(PetscBLASIntCast(mfqP->npmax * (mfqP->nmodelpoints), &blask));
560792fecdfSBarry Smith         PetscCallBLAS("BLAScopy", BLAScopy_(&blask, mfqP->Q_tmp, &ione, mfqP->Q, &ione));
5616497c311SBarry Smith         PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask));
5626497c311SBarry Smith         PetscCall(PetscBLASIntCast(PetscMax(mfqP->m, mfqP->n), &blasmaxmn));
563792fecdfSBarry Smith         PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->mwork, &blasmaxmn, &info));
56463a3b9bcSJacob Faibussowitsch         PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "geqrf returned value %" PetscBLASInt_FMT, info);
565a7e14dcfSSatish Balay         mfqP->q_is_I = 0;
566a7e14dcfSSatish Balay       }
567ad540459SPierre Jolivet       if (mfqP->nmodelpoints == mfqP->n) break;
568a7e14dcfSSatish Balay     }
569a7e14dcfSSatish Balay   }
5703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
571a7e14dcfSSatish Balay }
572f06e3bfaSBarry Smith 
573d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoSolve_POUNDERS(Tao tao)
574d71ae5a4SJacob Faibussowitsch {
575a7e14dcfSSatish Balay   TAO_POUNDERS    *mfqP = (TAO_POUNDERS *)tao->data;
5768931d482SJason Sarich   PetscInt         i, ii, j, k, l;
577a7e14dcfSSatish Balay   PetscReal        step = 1.0;
578a7e14dcfSSatish Balay   PetscInt         low, high;
579a7e14dcfSSatish Balay   PetscReal        minnorm;
5805e081366SBarry Smith   PetscReal       *x, *f;
5815e081366SBarry Smith   const PetscReal *xmint, *fmin;
582bd026e97SJed Brown   PetscReal        deltaold;
583a7e14dcfSSatish Balay   PetscReal        gnorm;
584a7e14dcfSSatish Balay   PetscBLASInt     info, ione = 1, iblas;
585ff2b74efSJason Sarich   PetscBool        valid, same;
586a7e14dcfSSatish Balay   PetscReal        mdec, rho, normxsp;
587a7e14dcfSSatish Balay   PetscReal        one = 1.0, zero = 0.0, ratio;
588125ddc8aSJason Sarich   PetscBLASInt     blasm, blasn, blasncopy, blasnpmax;
589a7bbbda8SBarry Smith   static PetscBool set = PETSC_FALSE;
590a7e14dcfSSatish Balay 
591a7e14dcfSSatish Balay   /* n = # of parameters
592a7e14dcfSSatish Balay      m = dimension (components) of function  */
593a7e14dcfSSatish Balay   PetscFunctionBegin;
5949566063dSJacob Faibussowitsch   PetscCall(PetscCitationsRegister("@article{UNEDF0,\n"
595a7bbbda8SBarry Smith                                    "title = {Nuclear energy density optimization},\n"
596a7bbbda8SBarry Smith                                    "author = {Kortelainen, M.  and Lesinski, T.  and Mor\'e, J.  and Nazarewicz, W.\n"
597a7bbbda8SBarry Smith                                    "          and Sarich, J.  and Schunck, N.  and Stoitsov, M. V. and Wild, S. },\n"
598a7bbbda8SBarry Smith                                    "journal = {Phys. Rev. C},\n"
599a7bbbda8SBarry Smith                                    "volume = {82},\n"
600a7bbbda8SBarry Smith                                    "number = {2},\n"
601a7bbbda8SBarry Smith                                    "pages = {024313},\n"
602a7bbbda8SBarry Smith                                    "numpages = {18},\n"
603a7bbbda8SBarry Smith                                    "year = {2010},\n"
604a7bbbda8SBarry Smith                                    "month = {Aug},\n"
6059371c9d4SSatish Balay                                    "doi = {10.1103/PhysRevC.82.024313}\n}\n",
6069371c9d4SSatish Balay                                    &set));
607e6d4cb7fSJason Sarich   tao->niter = 0;
608a7e14dcfSSatish Balay   if (tao->XL && tao->XU) {
609a7e14dcfSSatish Balay     /* Check x0 <= XU */
610a7e14dcfSSatish Balay     PetscReal val;
611ce902467SBarry Smith 
6129566063dSJacob Faibussowitsch     PetscCall(VecCopy(tao->solution, mfqP->Xhist[0]));
6139566063dSJacob Faibussowitsch     PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU));
6149566063dSJacob Faibussowitsch     PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
6153c859ba3SBarry Smith     PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 > upper bound");
616a7e14dcfSSatish Balay 
617a7e14dcfSSatish Balay     /* Check x0 >= xl */
6189566063dSJacob Faibussowitsch     PetscCall(VecCopy(tao->XL, mfqP->Xhist[0]));
6199566063dSJacob Faibussowitsch     PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->solution));
6209566063dSJacob Faibussowitsch     PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
6213c859ba3SBarry Smith     PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 < lower bound");
622f06e3bfaSBarry Smith 
623a7e14dcfSSatish Balay     /* Check x0 + delta < XU  -- should be able to get around this eventually */
624a7e14dcfSSatish Balay 
6259566063dSJacob Faibussowitsch     PetscCall(VecSet(mfqP->Xhist[0], mfqP->delta));
6269566063dSJacob Faibussowitsch     PetscCall(VecAXPY(mfqP->Xhist[0], 1.0, tao->solution));
6279566063dSJacob Faibussowitsch     PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU));
6289566063dSJacob Faibussowitsch     PetscCall(VecMax(mfqP->Xhist[0], NULL, &val));
6293c859ba3SBarry Smith     PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 + delta > upper bound");
630a7e14dcfSSatish Balay   }
631a7e14dcfSSatish Balay 
6326497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->m, &blasm));
6336497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->n, &blasn));
6346497c311SBarry Smith   PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax));
635ce902467SBarry Smith   for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; ++i) mfqP->H[i] = 0;
636a7e14dcfSSatish Balay 
6379566063dSJacob Faibussowitsch   PetscCall(VecCopy(tao->solution, mfqP->Xhist[0]));
638ce902467SBarry Smith 
639ce902467SBarry Smith   /* This provides enough information to approximate the gradient of the objective */
640ce902467SBarry Smith   /* using a forward difference scheme. */
641ce902467SBarry Smith 
6429566063dSJacob Faibussowitsch   PetscCall(PetscInfo(tao, "Initialize simplex; delta = %10.9e\n", (double)mfqP->delta));
6439566063dSJacob Faibussowitsch   PetscCall(pounders_feval(tao, mfqP->Xhist[0], mfqP->Fhist[0], &mfqP->Fres[0]));
644a7e14dcfSSatish Balay   mfqP->minindex = 0;
645a7e14dcfSSatish Balay   minnorm        = mfqP->Fres[0];
646ff2b74efSJason Sarich 
6479566063dSJacob Faibussowitsch   PetscCall(VecGetOwnershipRange(mfqP->Xhist[0], &low, &high));
648805332fbSTodd Munson   for (i = 1; i < mfqP->n + 1; ++i) {
6499566063dSJacob Faibussowitsch     PetscCall(VecCopy(mfqP->Xhist[0], mfqP->Xhist[i]));
650ce902467SBarry Smith 
651a7e14dcfSSatish Balay     if (i - 1 >= low && i - 1 < high) {
6529566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mfqP->Xhist[i], &x));
653a7e14dcfSSatish Balay       x[i - 1 - low] += mfqP->delta;
6549566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mfqP->Xhist[i], &x));
655a7e14dcfSSatish Balay     }
656a7e14dcfSSatish Balay     CHKMEMQ;
6579566063dSJacob Faibussowitsch     PetscCall(pounders_feval(tao, mfqP->Xhist[i], mfqP->Fhist[i], &mfqP->Fres[i]));
658a7e14dcfSSatish Balay     if (mfqP->Fres[i] < minnorm) {
659a7e14dcfSSatish Balay       mfqP->minindex = i;
660a7e14dcfSSatish Balay       minnorm        = mfqP->Fres[i];
661a7e14dcfSSatish Balay     }
662a7e14dcfSSatish Balay   }
6639566063dSJacob Faibussowitsch   PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
6649566063dSJacob Faibussowitsch   PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res));
6659566063dSJacob Faibussowitsch   PetscCall(PetscInfo(tao, "Finalize simplex; minnorm = %10.9e\n", (double)minnorm));
666ce902467SBarry Smith 
667a7e14dcfSSatish Balay   /* Gather mpi vecs to one big local vec */
668a7e14dcfSSatish Balay 
669a7e14dcfSSatish Balay   /* Begin serial code */
670a7e14dcfSSatish Balay 
671a7e14dcfSSatish Balay   /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
672a7e14dcfSSatish Balay   /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
673a7e14dcfSSatish Balay   /* (Column oriented for blas calls) */
674a7e14dcfSSatish Balay   ii = 0;
675a7e14dcfSSatish Balay 
676835f2295SStefano Zampini   PetscCall(PetscInfo(tao, "Build matrix: %d\n", mfqP->size));
677805332fbSTodd Munson   if (1 == mfqP->size) {
6789566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
679a7e14dcfSSatish Balay     for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
6809566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
6819566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
682a7e14dcfSSatish Balay     for (i = 0; i < mfqP->n + 1; i++) {
683a7e14dcfSSatish Balay       if (i == mfqP->minindex) continue;
684a7e14dcfSSatish Balay 
6859566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mfqP->Xhist[i], &x));
686ad540459SPierre Jolivet       for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
6879566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mfqP->Xhist[i], &x));
688a7e14dcfSSatish Balay 
6899566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mfqP->Fhist[i], &f));
690ad540459SPierre Jolivet       for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j];
6919566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mfqP->Fhist[i], &f));
69294e8dccaSTodd Munson 
693a7e14dcfSSatish Balay       mfqP->model_indices[ii++] = i;
694a7e14dcfSSatish Balay     }
695ad540459SPierre Jolivet     for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j];
6969566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
697a7e14dcfSSatish Balay   } else {
6989566063dSJacob Faibussowitsch     PetscCall(VecSet(mfqP->localxmin, 0));
6999566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD));
7009566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD));
701ce902467SBarry Smith 
7029566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(mfqP->localxmin, &xmint));
703a7e14dcfSSatish Balay     for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
7049566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(mfqP->localxmin, &xmint));
705a7e14dcfSSatish Balay 
7069566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD));
7079566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD));
7089566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(mfqP->localfmin, &fmin));
709a7e14dcfSSatish Balay     for (i = 0; i < mfqP->n + 1; i++) {
710a7e14dcfSSatish Balay       if (i == mfqP->minindex) continue;
711a7e14dcfSSatish Balay 
7129566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD));
7139566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD));
7149566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mfqP->localx, &x));
715ad540459SPierre Jolivet       for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta;
7169566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mfqP->localx, &x));
717a7e14dcfSSatish Balay 
7189566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD));
7199566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD));
7209566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mfqP->localf, &f));
721ad540459SPierre Jolivet       for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j];
7229566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mfqP->localf, &f));
72394e8dccaSTodd Munson 
72494e8dccaSTodd Munson       mfqP->model_indices[ii++] = i;
725a7e14dcfSSatish Balay     }
726ad540459SPierre Jolivet     for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j];
7279566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(mfqP->localfmin, &fmin));
728a7e14dcfSSatish Balay   }
729a7e14dcfSSatish Balay 
730a7e14dcfSSatish Balay   /* Determine the initial quadratic models */
731a7e14dcfSSatish Balay   /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */
732a7e14dcfSSatish Balay   /* D (nxn) Fdiff (nxm)  => G (nxm) */
733125ddc8aSJason Sarich   blasncopy = blasn;
734792fecdfSBarry Smith   PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&blasn, &blasm, mfqP->Disp, &blasnpmax, mfqP->iwork, mfqP->Fdiff, &blasncopy, &info));
735835f2295SStefano Zampini   PetscCall(PetscInfo(tao, "Linear solve return: %" PetscBLASInt_FMT "\n", info));
736a7e14dcfSSatish Balay 
7379566063dSJacob Faibussowitsch   PetscCall(pounders_update_res(tao));
738a7e14dcfSSatish Balay 
739a7e14dcfSSatish Balay   valid = PETSC_TRUE;
740a7e14dcfSSatish Balay 
7419566063dSJacob Faibussowitsch   PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
7429566063dSJacob Faibussowitsch   PetscCall(VecAssemblyBegin(tao->gradient));
7439566063dSJacob Faibussowitsch   PetscCall(VecAssemblyEnd(tao->gradient));
7449566063dSJacob Faibussowitsch   PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
745a7e14dcfSSatish Balay   gnorm *= mfqP->delta;
7469566063dSJacob Faibussowitsch   PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
747763847b4SAlp Dener 
748763847b4SAlp Dener   tao->reason = TAO_CONTINUE_ITERATING;
7499566063dSJacob Faibussowitsch   PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its));
7509566063dSJacob Faibussowitsch   PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step));
751dbbe0bcdSBarry Smith   PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
752763847b4SAlp Dener 
753a7e14dcfSSatish Balay   mfqP->nHist        = mfqP->n + 1;
754a7e14dcfSSatish Balay   mfqP->nmodelpoints = mfqP->n + 1;
7559566063dSJacob Faibussowitsch   PetscCall(PetscInfo(tao, "Initial gradient: %20.19e\n", (double)gnorm));
756a7e14dcfSSatish Balay 
757763847b4SAlp Dener   while (tao->reason == TAO_CONTINUE_ITERATING) {
758a7e14dcfSSatish Balay     PetscReal gnm = 1e-4;
759e1e80dc8SAlp Dener     /* Call general purpose update function */
760dbbe0bcdSBarry Smith     PetscTryTypeMethod(tao, update, tao->niter, tao->user_update);
7618931d482SJason Sarich     tao->niter++;
762ce902467SBarry Smith     /* Solve the subproblem min{Q(s): ||s|| <= 1.0} */
7639566063dSJacob Faibussowitsch     PetscCall(gqtwrap(tao, &gnm, &mdec));
764a7e14dcfSSatish Balay     /* Evaluate the function at the new point */
765a7e14dcfSSatish Balay 
766ad540459SPierre Jolivet     for (i = 0; i < mfqP->n; i++) mfqP->work[i] = mfqP->Xsubproblem[i] * mfqP->delta + mfqP->xmin[i];
7679566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[mfqP->nHist]));
7689566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[mfqP->nHist]));
7699566063dSJacob Faibussowitsch     PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, mfqP->work, INSERT_VALUES));
7709566063dSJacob Faibussowitsch     PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]));
7719566063dSJacob Faibussowitsch     PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]));
772a7e14dcfSSatish Balay 
7739566063dSJacob Faibussowitsch     PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist]));
774125ddc8aSJason Sarich 
775a7e14dcfSSatish Balay     rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec;
776a7e14dcfSSatish Balay     mfqP->nHist++;
777a7e14dcfSSatish Balay 
778a7e14dcfSSatish Balay     /* Update the center */
779a7e14dcfSSatish Balay     if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid == PETSC_TRUE)) {
780a7e14dcfSSatish Balay       /* Update model to reflect new base point */
781ad540459SPierre Jolivet       for (i = 0; i < mfqP->n; i++) mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i]) / mfqP->delta;
782a7e14dcfSSatish Balay       for (j = 0; j < mfqP->m; j++) {
783a7e14dcfSSatish Balay         /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work';
784a7e14dcfSSatish Balay          G(:,j) = G(:,j) + H(:,:,j)*work' */
785a7e14dcfSSatish Balay         for (k = 0; k < mfqP->n; k++) {
786a7e14dcfSSatish Balay           mfqP->work2[k] = 0.0;
787ad540459SPierre Jolivet           for (l = 0; l < mfqP->n; l++) mfqP->work2[k] += mfqP->H[j + mfqP->m * (k + l * mfqP->n)] * mfqP->work[l];
788a7e14dcfSSatish Balay         }
789a7e14dcfSSatish Balay         for (i = 0; i < mfqP->n; i++) {
790a7e14dcfSSatish Balay           mfqP->C[j] += mfqP->work[i] * (mfqP->Fdiff[i + mfqP->n * j] + 0.5 * mfqP->work2[i]);
791a7e14dcfSSatish Balay           mfqP->Fdiff[i + mfqP->n * j] += mfqP->work2[i];
792a7e14dcfSSatish Balay         }
793a7e14dcfSSatish Balay       }
794a7e14dcfSSatish Balay       /* Cres += work*Gres + .5*work*Hres*work';
795a7e14dcfSSatish Balay        Gres += Hres*work'; */
796a7e14dcfSSatish Balay 
797792fecdfSBarry Smith       PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &one, mfqP->Hres, &blasn, mfqP->work, &ione, &zero, mfqP->work2, &ione));
798ad540459SPierre Jolivet       for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] += mfqP->work2[i];
799a7e14dcfSSatish Balay       mfqP->minindex = mfqP->nHist - 1;
800a7e14dcfSSatish Balay       minnorm        = mfqP->Fres[mfqP->minindex];
8019566063dSJacob Faibussowitsch       PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res));
802a7e14dcfSSatish Balay       /* Change current center */
8039566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
804ad540459SPierre Jolivet       for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i];
8059566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint));
806a7e14dcfSSatish Balay     }
807a7e14dcfSSatish Balay 
808a7e14dcfSSatish Balay     /* Evaluate at a model-improving point if necessary */
809a7e14dcfSSatish Balay     if (valid == PETSC_FALSE) {
810a7e14dcfSSatish Balay       mfqP->q_is_I       = 1;
8118a35fac9SJason Sarich       mfqP->nmodelpoints = 0;
8129566063dSJacob Faibussowitsch       PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1));
813a7e14dcfSSatish Balay       if (mfqP->nmodelpoints < mfqP->n) {
8149566063dSJacob Faibussowitsch         PetscCall(PetscInfo(tao, "Model not valid -- model-improving\n"));
8159566063dSJacob Faibussowitsch         PetscCall(modelimprove(tao, mfqP, 1));
816a7e14dcfSSatish Balay       }
817a7e14dcfSSatish Balay     }
818a7e14dcfSSatish Balay 
819a7e14dcfSSatish Balay     /* Update the trust region radius */
820a7e14dcfSSatish Balay     deltaold = mfqP->delta;
821a7e14dcfSSatish Balay     normxsp  = 0;
822ad540459SPierre Jolivet     for (i = 0; i < mfqP->n; i++) normxsp += mfqP->Xsubproblem[i] * mfqP->Xsubproblem[i];
823a7e14dcfSSatish Balay     normxsp = PetscSqrtReal(normxsp);
824a7e14dcfSSatish Balay     if (rho >= mfqP->eta1 && normxsp > 0.5 * mfqP->delta) {
825a7e14dcfSSatish Balay       mfqP->delta = PetscMin(mfqP->delta * mfqP->gamma1, mfqP->deltamax);
826a7e14dcfSSatish Balay     } else if (valid == PETSC_TRUE) {
827a7e14dcfSSatish Balay       mfqP->delta = PetscMax(mfqP->delta * mfqP->gamma0, mfqP->deltamin);
828a7e14dcfSSatish Balay     }
829a7e14dcfSSatish Balay 
830a7e14dcfSSatish Balay     /* Compute the next interpolation set */
831a7e14dcfSSatish Balay     mfqP->q_is_I       = 1;
832a7e14dcfSSatish Balay     mfqP->nmodelpoints = 0;
8339566063dSJacob Faibussowitsch     PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c1 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c1));
8349566063dSJacob Faibussowitsch     PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1));
835a7e14dcfSSatish Balay     if (mfqP->nmodelpoints == mfqP->n) {
836a7e14dcfSSatish Balay       valid = PETSC_TRUE;
837a7e14dcfSSatish Balay     } else {
838a7e14dcfSSatish Balay       valid = PETSC_FALSE;
8399566063dSJacob Faibussowitsch       PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c2 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c2));
8409566063dSJacob Faibussowitsch       PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c2));
841a7e14dcfSSatish Balay       if (mfqP->n > mfqP->nmodelpoints) {
8429566063dSJacob Faibussowitsch         PetscCall(PetscInfo(tao, "Model not valid -- adding geometry points\n"));
8439566063dSJacob Faibussowitsch         PetscCall(modelimprove(tao, mfqP, mfqP->n - mfqP->nmodelpoints));
844a7e14dcfSSatish Balay       }
845a7e14dcfSSatish Balay     }
846ad540459SPierre Jolivet     for (i = mfqP->nmodelpoints; i > 0; i--) mfqP->model_indices[i] = mfqP->model_indices[i - 1];
847a7e14dcfSSatish Balay     mfqP->nmodelpoints++;
848a7e14dcfSSatish Balay     mfqP->model_indices[0] = mfqP->minindex;
8499566063dSJacob Faibussowitsch     PetscCall(morepoints(mfqP));
850a7e14dcfSSatish Balay     for (i = 0; i < mfqP->nmodelpoints; i++) {
8519566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mfqP->Xhist[mfqP->model_indices[i]], &x));
852ad540459SPierre Jolivet       for (j = 0; j < mfqP->n; j++) mfqP->Disp[i + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / deltaold;
8539566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]], &x));
8549566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mfqP->Fhist[mfqP->model_indices[i]], &f));
855a7e14dcfSSatish Balay       for (j = 0; j < mfqP->m; j++) {
856a7e14dcfSSatish Balay         for (k = 0; k < mfqP->n; k++) {
857a7e14dcfSSatish Balay           mfqP->work[k] = 0.0;
858ad540459SPierre Jolivet           for (l = 0; l < mfqP->n; l++) mfqP->work[k] += mfqP->H[j + mfqP->m * (k + mfqP->n * l)] * mfqP->Disp[i + mfqP->npmax * l];
859a7e14dcfSSatish Balay         }
860792fecdfSBarry Smith         PetscCallBLAS("BLASdot", mfqP->RES[j * mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn, &mfqP->Fdiff[j * mfqP->n], &ione, &mfqP->Disp[i], &blasnpmax) - 0.5 * BLASdot_(&blasn, mfqP->work, &ione, &mfqP->Disp[i], &blasnpmax) + f[j]);
861a7e14dcfSSatish Balay       }
8629566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]], &f));
863a7e14dcfSSatish Balay     }
864a7e14dcfSSatish Balay 
865a7e14dcfSSatish Balay     /* Update the quadratic model */
86663a3b9bcSJacob Faibussowitsch     PetscCall(PetscInfo(tao, "Get Quad, size: %" PetscInt_FMT ", points: %" PetscInt_FMT "\n", mfqP->n, mfqP->nmodelpoints));
8679566063dSJacob Faibussowitsch     PetscCall(getquadpounders(mfqP));
8689566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin));
869792fecdfSBarry Smith     PetscCallBLAS("BLAScopy", BLAScopy_(&blasm, fmin, &ione, mfqP->C, &ione));
870a7e14dcfSSatish Balay     /* G = G*(delta/deltaold) + Gdel */
871a7e14dcfSSatish Balay     ratio = mfqP->delta / deltaold;
872a7e14dcfSSatish Balay     iblas = blasm * blasn;
873792fecdfSBarry Smith     PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->Fdiff, &ione));
874792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Gdel, &ione, mfqP->Fdiff, &ione));
875125ddc8aSJason Sarich     /* H = H*(delta/deltaold)^2 + Hdel */
876a7e14dcfSSatish Balay     iblas = blasm * blasn * blasn;
877a7e14dcfSSatish Balay     ratio *= ratio;
878792fecdfSBarry Smith     PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->H, &ione));
879792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Hdel, &ione, mfqP->H, &ione));
880a7e14dcfSSatish Balay 
881a7e14dcfSSatish Balay     /* Get residuals */
8829566063dSJacob Faibussowitsch     PetscCall(pounders_update_res(tao));
883a7e14dcfSSatish Balay 
884a7e14dcfSSatish Balay     /* Export solution and gradient residual to TAO */
8859566063dSJacob Faibussowitsch     PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution));
8869566063dSJacob Faibussowitsch     PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES));
8879566063dSJacob Faibussowitsch     PetscCall(VecAssemblyBegin(tao->gradient));
8889566063dSJacob Faibussowitsch     PetscCall(VecAssemblyEnd(tao->gradient));
8899566063dSJacob Faibussowitsch     PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm));
890a7e14dcfSSatish Balay     gnorm *= mfqP->delta;
891a7e14dcfSSatish Balay     /*  final criticality test */
8929566063dSJacob Faibussowitsch     PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its));
8939566063dSJacob Faibussowitsch     PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step));
894dbbe0bcdSBarry Smith     PetscUseTypeMethod(tao, convergencetest, tao->cnvP);
895ff2b74efSJason Sarich     /* test for repeated model */
896c9376581SJason Sarich     if (mfqP->nmodelpoints == mfqP->last_nmodelpoints) {
897c9376581SJason Sarich       same = PETSC_TRUE;
898c9376581SJason Sarich     } else {
899c9376581SJason Sarich       same = PETSC_FALSE;
900c9376581SJason Sarich     }
901ff2b74efSJason Sarich     for (i = 0; i < mfqP->nmodelpoints; i++) {
902c9376581SJason Sarich       if (same) {
903c9376581SJason Sarich         if (mfqP->model_indices[i] == mfqP->last_model_indices[i]) {
904c9376581SJason Sarich           same = PETSC_TRUE;
905c9376581SJason Sarich         } else {
906c9376581SJason Sarich           same = PETSC_FALSE;
907c9376581SJason Sarich         }
908c9376581SJason Sarich       }
909ff2b74efSJason Sarich       mfqP->last_model_indices[i] = mfqP->model_indices[i];
910ff2b74efSJason Sarich     }
911ff2b74efSJason Sarich     mfqP->last_nmodelpoints = mfqP->nmodelpoints;
912ff2b74efSJason Sarich     if (same && mfqP->delta == deltaold) {
9139566063dSJacob Faibussowitsch       PetscCall(PetscInfo(tao, "Identical model used in successive iterations\n"));
9145d72676cSJason Sarich       tao->reason = TAO_CONVERGED_STEPTOL;
915ff2b74efSJason Sarich     }
916a7e14dcfSSatish Balay   }
9173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
918a7e14dcfSSatish Balay }
919a7e14dcfSSatish Balay 
920d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoSetUp_POUNDERS(Tao tao)
921d71ae5a4SJacob Faibussowitsch {
922a7e14dcfSSatish Balay   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
923125ddc8aSJason Sarich   PetscInt      i, j;
924a7e14dcfSSatish Balay   IS            isfloc, isfglob, isxloc, isxglob;
925a7e14dcfSSatish Balay 
926a7e14dcfSSatish Balay   PetscFunctionBegin;
9279566063dSJacob Faibussowitsch   if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient));
9289566063dSJacob Faibussowitsch   if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection));
9299566063dSJacob Faibussowitsch   PetscCall(VecGetSize(tao->solution, &mfqP->n));
9309566063dSJacob Faibussowitsch   PetscCall(VecGetSize(tao->ls_res, &mfqP->m));
931a7e14dcfSSatish Balay   mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n);
932606f75f6SBarry Smith   if (mfqP->npmax == PETSC_CURRENT) mfqP->npmax = 2 * mfqP->n + 1;
933a7e14dcfSSatish Balay   mfqP->npmax = PetscMin((mfqP->n + 1) * (mfqP->n + 2) / 2, mfqP->npmax);
934a7e14dcfSSatish Balay   mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n + 2);
935a7e14dcfSSatish Balay 
9369566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Xhist));
9379566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fhist));
938a7e14dcfSSatish Balay   for (i = 0; i < mfqP->n + 1; i++) {
9399566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[i]));
9409566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[i]));
941a7e14dcfSSatish Balay   }
9429566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(tao->solution, &mfqP->workxvec));
9439566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(tao->ls_res, &mfqP->workfvec));
944a7e14dcfSSatish Balay   mfqP->nHist = 0;
945a7e14dcfSSatish Balay 
9469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fres));
9479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->m, &mfqP->RES));
9489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n, &mfqP->work));
9499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n, &mfqP->work2));
9509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n, &mfqP->work3));
9519566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n + 1), &mfqP->mwork));
9529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->npmax - mfqP->n - 1, &mfqP->omega));
9539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2, &mfqP->beta));
9549566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n + 1, &mfqP->alpha));
955a7e14dcfSSatish Balay 
9569566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->H));
9579566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q));
9589566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q_tmp));
9599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L));
9609566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_tmp));
9619566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_save));
9629566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->N));
9639566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->n + 1), &mfqP->M));
9649566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->npmax - mfqP->n - 1), &mfqP->Z));
9659566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau));
9669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau_tmp));
967a7e14dcfSSatish Balay   mfqP->nmax = PetscMax(5 * mfqP->npmax, mfqP->n * (mfqP->n + 1) / 2);
9689566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxwork));
9699566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxiwork));
9709566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n, &mfqP->xmin));
9719566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->m, &mfqP->C));
9729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Fdiff));
9739566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->npmax * mfqP->n, &mfqP->Disp));
9749566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n, &mfqP->Gres));
9759566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Hres));
9769566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Gpoints));
9779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->model_indices));
9789566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->last_model_indices));
9799566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n, &mfqP->Xsubproblem));
9809566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Gdel));
9819566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->Hdel));
9829566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n), &mfqP->indices));
9839566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->n, &mfqP->iwork));
9849566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(mfqP->m * mfqP->m, &mfqP->w));
985125ddc8aSJason Sarich   for (i = 0; i < mfqP->m; i++) {
986125ddc8aSJason Sarich     for (j = 0; j < mfqP->m; j++) {
987125ddc8aSJason Sarich       if (i == j) {
988125ddc8aSJason Sarich         mfqP->w[i + mfqP->m * j] = 1.0;
989125ddc8aSJason Sarich       } else {
990125ddc8aSJason Sarich         mfqP->w[i + mfqP->m * j] = 0.0;
991125ddc8aSJason Sarich       }
992125ddc8aSJason Sarich     }
993125ddc8aSJason Sarich   }
994ad540459SPierre Jolivet   for (i = 0; i < PetscMax(mfqP->m, mfqP->n); i++) mfqP->indices[i] = i;
9959566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(((PetscObject)tao)->comm, &mfqP->size));
996f40bd260SBarry Smith   if (mfqP->size > 1) {
9979566063dSJacob Faibussowitsch     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localx));
9989566063dSJacob Faibussowitsch     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localxmin));
9999566063dSJacob Faibussowitsch     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localf));
10009566063dSJacob Faibussowitsch     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localfmin));
10019566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxloc));
10029566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxglob));
10039566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfloc));
10049566063dSJacob Faibussowitsch     PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfglob));
1005a7e14dcfSSatish Balay 
10069566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(tao->solution, isxglob, mfqP->localx, isxloc, &mfqP->scatterx));
10079566063dSJacob Faibussowitsch     PetscCall(VecScatterCreate(tao->ls_res, isfglob, mfqP->localf, isfloc, &mfqP->scatterf));
1008a7e14dcfSSatish Balay 
10099566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isxloc));
10109566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isxglob));
10119566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isfloc));
10129566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&isfglob));
1013a7e14dcfSSatish Balay   }
1014a7e14dcfSSatish Balay 
1015a7e14dcfSSatish Balay   if (!mfqP->usegqt) {
1016a7e14dcfSSatish Balay     KSP ksp;
1017a7e14dcfSSatish Balay     PC  pc;
10189566063dSJacob Faibussowitsch     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Xsubproblem, &mfqP->subx));
10199566063dSJacob Faibussowitsch     PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->subxl));
10209566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subb));
10219566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subxu));
10229566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subpdel));
10239566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subndel));
10249566063dSJacob Faibussowitsch     PetscCall(TaoCreate(PETSC_COMM_SELF, &mfqP->subtao));
10259566063dSJacob Faibussowitsch     PetscCall(PetscObjectIncrementTabLevel((PetscObject)mfqP->subtao, (PetscObject)tao, 1));
10269566063dSJacob Faibussowitsch     PetscCall(TaoSetType(mfqP->subtao, TAOBNTR));
10279566063dSJacob Faibussowitsch     PetscCall(TaoSetOptionsPrefix(mfqP->subtao, "pounders_subsolver_"));
10289566063dSJacob Faibussowitsch     PetscCall(TaoSetSolution(mfqP->subtao, mfqP->subx));
10299566063dSJacob Faibussowitsch     PetscCall(TaoSetObjectiveAndGradient(mfqP->subtao, NULL, pounders_fg, (void *)mfqP));
10309566063dSJacob Faibussowitsch     PetscCall(TaoSetMaximumIterations(mfqP->subtao, mfqP->gqt_maxits));
10319566063dSJacob Faibussowitsch     PetscCall(TaoSetFromOptions(mfqP->subtao));
10329566063dSJacob Faibussowitsch     PetscCall(TaoGetKSP(mfqP->subtao, &ksp));
1033a7e14dcfSSatish Balay     if (ksp) {
10349566063dSJacob Faibussowitsch       PetscCall(KSPGetPC(ksp, &pc));
10359566063dSJacob Faibussowitsch       PetscCall(PCSetType(pc, PCNONE));
1036a7e14dcfSSatish Balay     }
10379566063dSJacob Faibussowitsch     PetscCall(TaoSetVariableBounds(mfqP->subtao, mfqP->subxl, mfqP->subxu));
10389566063dSJacob Faibussowitsch     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Hres, &mfqP->subH));
10399566063dSJacob Faibussowitsch     PetscCall(TaoSetHessian(mfqP->subtao, mfqP->subH, mfqP->subH, pounders_h, (void *)mfqP));
1040a7e14dcfSSatish Balay   }
10413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1042a7e14dcfSSatish Balay }
1043a7e14dcfSSatish Balay 
1044d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoDestroy_POUNDERS(Tao tao)
1045d71ae5a4SJacob Faibussowitsch {
1046a7e14dcfSSatish Balay   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1047a7e14dcfSSatish Balay   PetscInt      i;
1048a7e14dcfSSatish Balay 
1049a7e14dcfSSatish Balay   PetscFunctionBegin;
1050a7e14dcfSSatish Balay   if (!mfqP->usegqt) {
10519566063dSJacob Faibussowitsch     PetscCall(TaoDestroy(&mfqP->subtao));
10529566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&mfqP->subx));
10539566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&mfqP->subxl));
10549566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&mfqP->subxu));
10559566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&mfqP->subb));
10569566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&mfqP->subpdel));
10579566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&mfqP->subndel));
10589566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&mfqP->subH));
1059a7e14dcfSSatish Balay   }
10609566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Fres));
10619566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->RES));
10629566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->work));
10639566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->work2));
10649566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->work3));
10659566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->mwork));
10669566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->omega));
10679566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->beta));
10689566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->alpha));
10699566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->H));
10709566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Q));
10719566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Q_tmp));
10729566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->L));
10739566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->L_tmp));
10749566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->L_save));
10759566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->N));
10769566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->M));
10779566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Z));
10789566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->tau));
10799566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->tau_tmp));
10809566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->npmaxwork));
10819566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->npmaxiwork));
10829566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->xmin));
10839566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->C));
10849566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Fdiff));
10859566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Disp));
10869566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Gres));
10879566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Hres));
10889566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Gpoints));
10899566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->model_indices));
10909566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->last_model_indices));
10919566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Xsubproblem));
10929566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Gdel));
10939566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Hdel));
10949566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->indices));
10959566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->iwork));
10969566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->w));
1097a7e14dcfSSatish Balay   for (i = 0; i < mfqP->nHist; i++) {
10989566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&mfqP->Xhist[i]));
10999566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&mfqP->Fhist[i]));
1100a7e14dcfSSatish Balay   }
11019566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mfqP->workxvec));
11029566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&mfqP->workfvec));
11039566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Xhist));
11049566063dSJacob Faibussowitsch   PetscCall(PetscFree(mfqP->Fhist));
1105a7e14dcfSSatish Balay 
1106f40bd260SBarry Smith   if (mfqP->size > 1) {
11079566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&mfqP->localx));
11089566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&mfqP->localxmin));
11099566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&mfqP->localf));
11109566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&mfqP->localfmin));
1111a7e14dcfSSatish Balay   }
11129566063dSJacob Faibussowitsch   PetscCall(PetscFree(tao->data));
11133ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1114a7e14dcfSSatish Balay }
1115a7e14dcfSSatish Balay 
1116ce78bad3SBarry Smith static PetscErrorCode TaoSetFromOptions_POUNDERS(Tao tao, PetscOptionItems PetscOptionsObject)
1117d71ae5a4SJacob Faibussowitsch {
1118a7e14dcfSSatish Balay   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1119a7e14dcfSSatish Balay 
1120a7e14dcfSSatish Balay   PetscFunctionBegin;
1121d0609cedSBarry Smith   PetscOptionsHeadBegin(PetscOptionsObject, "POUNDERS method for least-squares optimization");
11229566063dSJacob Faibussowitsch   PetscCall(PetscOptionsReal("-tao_pounders_delta", "initial delta", "", mfqP->delta, &mfqP->delta0, NULL));
1123ff2b74efSJason Sarich   mfqP->delta = mfqP->delta0;
11249566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-tao_pounders_npmax", "max number of points in model", "", mfqP->npmax, &mfqP->npmax, NULL));
11259566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-tao_pounders_gqt", "use gqt algorithm for subproblem", "", mfqP->usegqt, &mfqP->usegqt, NULL));
1126d0609cedSBarry Smith   PetscOptionsHeadEnd();
11273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1128a7e14dcfSSatish Balay }
1129a7e14dcfSSatish Balay 
1130d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoView_POUNDERS(Tao tao, PetscViewer viewer)
1131d71ae5a4SJacob Faibussowitsch {
1132a7e14dcfSSatish Balay   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1133a7e14dcfSSatish Balay   PetscBool     isascii;
1134a7e14dcfSSatish Balay 
1135a7e14dcfSSatish Balay   PetscFunctionBegin;
11369566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1137a7e14dcfSSatish Balay   if (isascii) {
11389566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "initial delta: %g\n", (double)mfqP->delta0));
11399566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "final delta: %g\n", (double)mfqP->delta));
114063a3b9bcSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "model points: %" PetscInt_FMT "\n", mfqP->nmodelpoints));
1141a7e14dcfSSatish Balay     if (mfqP->usegqt) {
11429566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "subproblem solver: gqt\n"));
1143a7e14dcfSSatish Balay     } else {
11449566063dSJacob Faibussowitsch       PetscCall(TaoView(mfqP->subtao, viewer));
1145a7e14dcfSSatish Balay     }
1146a7e14dcfSSatish Balay   }
11473ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1148a7e14dcfSSatish Balay }
11491eb8069cSJason Sarich /*MC
11501eb8069cSJason Sarich   TAOPOUNDERS - POUNDERS derivate-free model-based algorithm for nonlinear least squares
11511eb8069cSJason Sarich 
11521eb8069cSJason Sarich   Options Database Keys:
11531eb8069cSJason Sarich + -tao_pounders_delta - initial step length
11541eb8069cSJason Sarich . -tao_pounders_npmax - maximum number of points in model
11551eb8069cSJason Sarich - -tao_pounders_gqt - use gqt algorithm for subproblem instead of TRON
11561eb8069cSJason Sarich 
11571eb8069cSJason Sarich   Level: beginner
11581eb8069cSJason Sarich 
11591eb8069cSJason Sarich M*/
1160a7e14dcfSSatish Balay 
1161d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode TaoCreate_POUNDERS(Tao tao)
1162d71ae5a4SJacob Faibussowitsch {
1163a7e14dcfSSatish Balay   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1164a7e14dcfSSatish Balay 
1165a7e14dcfSSatish Balay   PetscFunctionBegin;
1166a7e14dcfSSatish Balay   tao->ops->setup          = TaoSetUp_POUNDERS;
1167a7e14dcfSSatish Balay   tao->ops->solve          = TaoSolve_POUNDERS;
1168a7e14dcfSSatish Balay   tao->ops->view           = TaoView_POUNDERS;
1169a7e14dcfSSatish Balay   tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS;
1170a7e14dcfSSatish Balay   tao->ops->destroy        = TaoDestroy_POUNDERS;
1171a7e14dcfSSatish Balay 
11724dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&mfqP));
1173a7e14dcfSSatish Balay   tao->data = (void *)mfqP;
1174606f75f6SBarry Smith 
11756552cf8aSJason Sarich   /* Override default settings (unless already changed) */
1176606f75f6SBarry Smith   PetscCall(TaoParametersInitialize(tao));
1177606f75f6SBarry Smith   PetscObjectParameterSetDefault(tao, max_it, 2000);
1178606f75f6SBarry Smith   PetscObjectParameterSetDefault(tao, max_funcs, 4000);
1179606f75f6SBarry Smith 
1180606f75f6SBarry Smith   mfqP->npmax      = PETSC_CURRENT;
1181ff2b74efSJason Sarich   mfqP->delta0     = 0.1;
1182a7e14dcfSSatish Balay   mfqP->delta      = 0.1;
1183a7e14dcfSSatish Balay   mfqP->deltamax   = 1e3;
1184125ddc8aSJason Sarich   mfqP->deltamin   = 1e-6;
1185a8bd9cb3STodd Munson   mfqP->c2         = 10.0;
1186a7e14dcfSSatish Balay   mfqP->theta1     = 1e-5;
1187a7e14dcfSSatish Balay   mfqP->theta2     = 1e-4;
1188a7e14dcfSSatish Balay   mfqP->gamma0     = 0.5;
1189a7e14dcfSSatish Balay   mfqP->gamma1     = 2.0;
1190a7e14dcfSSatish Balay   mfqP->eta0       = 0.0;
1191a7e14dcfSSatish Balay   mfqP->eta1       = 0.1;
1192f553c655STodd Munson   mfqP->usegqt     = PETSC_FALSE;
1193a7e14dcfSSatish Balay   mfqP->gqt_rtol   = 0.001;
1194a7e14dcfSSatish Balay   mfqP->gqt_maxits = 50;
119583c8fe1dSLisandro Dalcin   mfqP->workxvec   = NULL;
11963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1197a7e14dcfSSatish Balay }
1198