1aaa7dc30SBarry Smith #include <../src/tao/leastsquares/impls/pounders/pounders.h> 2a7e14dcfSSatish Balay 3*9371c9d4SSatish Balay static PetscErrorCode pounders_h(Tao subtao, Vec v, Mat H, Mat Hpre, void *ctx) { 4a7e14dcfSSatish Balay PetscFunctionBegin; 5a7e14dcfSSatish Balay PetscFunctionReturn(0); 6a7e14dcfSSatish Balay } 7ca88a2e0STodd Munson 8*9371c9d4SSatish Balay static PetscErrorCode pounders_fg(Tao subtao, Vec x, PetscReal *f, Vec g, void *ctx) { 9a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)ctx; 10a7e14dcfSSatish Balay PetscReal d1, d2; 11f06e3bfaSBarry Smith 12a7e14dcfSSatish Balay PetscFunctionBegin; 13a7e14dcfSSatish Balay /* g = A*x (add b later)*/ 149566063dSJacob Faibussowitsch PetscCall(MatMult(mfqP->subH, x, g)); 15a7e14dcfSSatish Balay 16a7e14dcfSSatish Balay /* f = 1/2 * x'*(Ax) + b'*x */ 179566063dSJacob Faibussowitsch PetscCall(VecDot(x, g, &d1)); 189566063dSJacob Faibussowitsch PetscCall(VecDot(mfqP->subb, x, &d2)); 19a7e14dcfSSatish Balay *f = 0.5 * d1 + d2; 20a7e14dcfSSatish Balay 21a7e14dcfSSatish Balay /* now g = g + b */ 229566063dSJacob Faibussowitsch PetscCall(VecAXPY(g, 1.0, mfqP->subb)); 23a7e14dcfSSatish Balay PetscFunctionReturn(0); 24a7e14dcfSSatish Balay } 25ca88a2e0STodd Munson 26*9371c9d4SSatish Balay static PetscErrorCode pounders_feval(Tao tao, Vec x, Vec F, PetscReal *fsum) { 27125ddc8aSJason Sarich TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 28125ddc8aSJason Sarich PetscInt i, row, col; 29125ddc8aSJason Sarich PetscReal fr, fc; 30691b26d3SBarry Smith 31125ddc8aSJason Sarich PetscFunctionBegin; 329566063dSJacob Faibussowitsch PetscCall(TaoComputeResidual(tao, x, F)); 33737f463aSAlp Dener if (tao->res_weights_v) { 349566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(mfqP->workfvec, tao->res_weights_v, F)); 359566063dSJacob Faibussowitsch PetscCall(VecDot(mfqP->workfvec, mfqP->workfvec, fsum)); 36737f463aSAlp Dener } else if (tao->res_weights_w) { 37125ddc8aSJason Sarich *fsum = 0; 38737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 39737f463aSAlp Dener row = tao->res_weights_rows[i]; 40737f463aSAlp Dener col = tao->res_weights_cols[i]; 419566063dSJacob Faibussowitsch PetscCall(VecGetValues(F, 1, &row, &fr)); 429566063dSJacob Faibussowitsch PetscCall(VecGetValues(F, 1, &col, &fc)); 43737f463aSAlp Dener *fsum += tao->res_weights_w[i] * fc * fr; 44125ddc8aSJason Sarich } 45125ddc8aSJason Sarich } else { 469566063dSJacob Faibussowitsch PetscCall(VecDot(F, F, fsum)); 47125ddc8aSJason Sarich } 489566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Least-squares residual norm: %20.19e\n", (double)*fsum)); 493c859ba3SBarry Smith PetscCheck(!PetscIsInfOrNanReal(*fsum), PETSC_COMM_SELF, PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 50125ddc8aSJason Sarich PetscFunctionReturn(0); 51125ddc8aSJason Sarich } 52f06e3bfaSBarry Smith 53*9371c9d4SSatish Balay static PetscErrorCode gqtwrap(Tao tao, PetscReal *gnorm, PetscReal *qmin) { 546f4723b1SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 556f4723b1SBarry Smith PetscReal atol = 1.0e-5; 566f4723b1SBarry Smith #else 57a7e14dcfSSatish Balay PetscReal atol = 1.0e-10; 586f4723b1SBarry Smith #endif 59a7e14dcfSSatish Balay PetscInt info, its; 60a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 61a7e14dcfSSatish Balay 62a7e14dcfSSatish Balay PetscFunctionBegin; 63a7e14dcfSSatish Balay if (!mfqP->usegqt) { 64a7e14dcfSSatish Balay PetscReal maxval; 65a7e14dcfSSatish Balay PetscInt i, j; 66a7e14dcfSSatish Balay 679566063dSJacob Faibussowitsch PetscCall(VecSetValues(mfqP->subb, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 689566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(mfqP->subb)); 699566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(mfqP->subb)); 70a7e14dcfSSatish Balay 719566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->subx, 0.0)); 72a7e14dcfSSatish Balay 739566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->subndel, -1.0)); 749566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->subpdel, +1.0)); 75a7e14dcfSSatish Balay 76ce902467SBarry Smith /* Complete the lower triangle of the Hessian matrix */ 77a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 78*9371c9d4SSatish Balay for (j = i + 1; j < mfqP->n; j++) { mfqP->Hres[j + mfqP->n * i] = mfqP->Hres[mfqP->n * j + i]; } 79a7e14dcfSSatish Balay } 809566063dSJacob Faibussowitsch PetscCall(MatSetValues(mfqP->subH, mfqP->n, mfqP->indices, mfqP->n, mfqP->indices, mfqP->Hres, INSERT_VALUES)); 819566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mfqP->subH, MAT_FINAL_ASSEMBLY)); 829566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mfqP->subH, MAT_FINAL_ASSEMBLY)); 83a7e14dcfSSatish Balay 849566063dSJacob Faibussowitsch PetscCall(TaoResetStatistics(mfqP->subtao)); 859566063dSJacob Faibussowitsch /* PetscCall(TaoSetTolerances(mfqP->subtao,*gnorm,*gnorm,PETSC_DEFAULT)); */ 86a7e14dcfSSatish Balay /* enforce bound constraints -- experimental */ 87a7e14dcfSSatish Balay if (tao->XU && tao->XL) { 889566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->XU, mfqP->subxu)); 899566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subxu, -1.0, tao->solution)); 909566063dSJacob Faibussowitsch PetscCall(VecScale(mfqP->subxu, 1.0 / mfqP->delta)); 919566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->XL, mfqP->subxl)); 929566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subxl, -1.0, tao->solution)); 939566063dSJacob Faibussowitsch PetscCall(VecScale(mfqP->subxl, 1.0 / mfqP->delta)); 94a7e14dcfSSatish Balay 959566063dSJacob Faibussowitsch PetscCall(VecPointwiseMin(mfqP->subxu, mfqP->subxu, mfqP->subpdel)); 969566063dSJacob Faibussowitsch PetscCall(VecPointwiseMax(mfqP->subxl, mfqP->subxl, mfqP->subndel)); 97a7e14dcfSSatish Balay } else { 989566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subpdel, mfqP->subxu)); 999566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subndel, mfqP->subxl)); 100a7e14dcfSSatish Balay } 101a7e14dcfSSatish Balay /* Make sure xu > xl */ 1029566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 1039566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 1049566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 1053c859ba3SBarry Smith PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "upper bound < lower bound in subproblem"); 106a7e14dcfSSatish Balay /* Make sure xu > tao->solution > xl */ 1079566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 1089566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx)); 1099566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 1103c859ba3SBarry Smith PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess < lower bound in subproblem"); 111a7e14dcfSSatish Balay 1129566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subx, mfqP->subpdel)); 1139566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 1149566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 1153c859ba3SBarry Smith PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess > upper bound in subproblem"); 116a7e14dcfSSatish Balay 1179566063dSJacob Faibussowitsch PetscCall(TaoSolve(mfqP->subtao)); 1189566063dSJacob Faibussowitsch PetscCall(TaoGetSolutionStatus(mfqP->subtao, NULL, qmin, NULL, NULL, NULL, NULL)); 119a7e14dcfSSatish Balay 120a7e14dcfSSatish Balay /* test bounds post-solution*/ 1219566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 1229566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx)); 1239566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 124a7e14dcfSSatish Balay if (maxval > 1e-5) { 1259566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "subproblem solution < lower bound\n")); 126a7e14dcfSSatish Balay tao->reason = TAO_DIVERGED_TR_REDUCTION; 127a7e14dcfSSatish Balay } 128a7e14dcfSSatish Balay 1299566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subx, mfqP->subpdel)); 1309566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 1319566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 132a7e14dcfSSatish Balay if (maxval > 1e-5) { 1339566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "subproblem solution > upper bound\n")); 134a7e14dcfSSatish Balay tao->reason = TAO_DIVERGED_TR_REDUCTION; 135a7e14dcfSSatish Balay } 136a7e14dcfSSatish Balay } else { 137f06e3bfaSBarry Smith gqt(mfqP->n, mfqP->Hres, mfqP->n, mfqP->Gres, 1.0, mfqP->gqt_rtol, atol, mfqP->gqt_maxits, gnorm, qmin, mfqP->Xsubproblem, &info, &its, mfqP->work, mfqP->work2, mfqP->work3); 138a7e14dcfSSatish Balay } 139a7e14dcfSSatish Balay *qmin *= -1; 140a7e14dcfSSatish Balay PetscFunctionReturn(0); 141a7e14dcfSSatish Balay } 142a7e14dcfSSatish Balay 143*9371c9d4SSatish Balay static PetscErrorCode pounders_update_res(Tao tao) { 144125ddc8aSJason Sarich TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 145125ddc8aSJason Sarich PetscInt i, row, col; 146125ddc8aSJason Sarich PetscBLASInt blasn = mfqP->n, blasn2 = blasn * blasn, blasm = mfqP->m, ione = 1; 1476c95174dSJason Sarich PetscReal zero = 0.0, one = 1.0, wii, factor; 148125ddc8aSJason Sarich 149691b26d3SBarry Smith PetscFunctionBegin; 150*9371c9d4SSatish Balay for (i = 0; i < mfqP->n; i++) { mfqP->Gres[i] = 0; } 151*9371c9d4SSatish Balay for (i = 0; i < mfqP->n * mfqP->n; i++) { mfqP->Hres[i] = 0; } 152125ddc8aSJason Sarich 153125ddc8aSJason Sarich /* Compute Gres= sum_ij[wij * (cjgi + cigj)] */ 154737f463aSAlp Dener if (tao->res_weights_v) { 1556c95174dSJason Sarich /* Vector(diagonal) weights: gres = sum_i(wii*ci*gi) */ 156125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 1579566063dSJacob Faibussowitsch PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &factor)); 1586c95174dSJason Sarich factor = factor * mfqP->C[i]; 159792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * i], &ione, mfqP->Gres, &ione)); 160125ddc8aSJason Sarich } 161125ddc8aSJason Sarich 162125ddc8aSJason Sarich /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 1636c95174dSJason Sarich /* vector(diagonal weights) Hres = sum_i(wii*(ci*Hi + gi * gi')*/ 164125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 1659566063dSJacob Faibussowitsch PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &wii)); 166125ddc8aSJason Sarich if (tao->niter > 1) { 167125ddc8aSJason Sarich factor = wii * mfqP->C[i]; 1686c95174dSJason Sarich /* add wii * ci * Hi */ 169792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione)); 170125ddc8aSJason Sarich } 1716c95174dSJason Sarich /* add wii * gi * gi' */ 172792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &wii, &mfqP->Fdiff[blasn * i], &blasn, &mfqP->Fdiff[blasn * i], &blasn, &one, mfqP->Hres, &blasn)); 173125ddc8aSJason Sarich } 174737f463aSAlp Dener } else if (tao->res_weights_w) { 175ce902467SBarry Smith /* General case: .5 * Gres= sum_ij[wij * (cjgi + cigj)] */ 176737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 177737f463aSAlp Dener row = tao->res_weights_rows[i]; 178737f463aSAlp Dener col = tao->res_weights_cols[i]; 179ce902467SBarry Smith 180737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0; 181792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * row], &ione, mfqP->Gres, &ione)); 182737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0; 183792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * col], &ione, mfqP->Gres, &ione)); 184ce902467SBarry Smith } 185ce902467SBarry Smith 186ce902467SBarry Smith /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 1876c95174dSJason Sarich /* .5 * sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 188737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 189737f463aSAlp Dener row = tao->res_weights_rows[i]; 190737f463aSAlp Dener col = tao->res_weights_cols[i]; 191737f463aSAlp Dener factor = tao->res_weights_w[i] / 2.0; 192125ddc8aSJason Sarich /* add wij * gi gj' + wij * gj gi' */ 193792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * row], &blasn, &mfqP->Fdiff[blasn * col], &blasn, &one, mfqP->Hres, &blasn)); 194792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * col], &blasn, &mfqP->Fdiff[blasn * row], &blasn, &one, mfqP->Hres, &blasn)); 195125ddc8aSJason Sarich } 196125ddc8aSJason Sarich if (tao->niter > 1) { 197737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 198737f463aSAlp Dener row = tao->res_weights_rows[i]; 199737f463aSAlp Dener col = tao->res_weights_cols[i]; 200125ddc8aSJason Sarich 201125ddc8aSJason Sarich /* add wij*cj*Hi */ 202737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0; 203792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[row], &blasm, mfqP->Hres, &ione)); 204125ddc8aSJason Sarich 205125ddc8aSJason Sarich /* add wij*ci*Hj */ 206737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0; 207792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[col], &blasm, mfqP->Hres, &ione)); 208125ddc8aSJason Sarich } 209125ddc8aSJason Sarich } 210125ddc8aSJason Sarich } else { 211ce902467SBarry Smith /* Default: Gres= sum_i[cigi] = G*c' */ 2129566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Identity weights\n")); 213792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->C, &ione, &zero, mfqP->Gres, &ione)); 214ce902467SBarry Smith 215ce902467SBarry Smith /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 2166c95174dSJason Sarich /* Hres = G*G' + 0.5 sum {F(xkin,i)*H(:,:,i)} */ 217792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->Fdiff, &blasn, &zero, mfqP->Hres, &blasn)); 218125ddc8aSJason Sarich 219125ddc8aSJason Sarich /* sum(F(xkin,i)*H(:,:,i)) */ 220125ddc8aSJason Sarich if (tao->niter > 1) { 221125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 2226c95174dSJason Sarich factor = mfqP->C[i]; 223792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione)); 224125ddc8aSJason Sarich } 225125ddc8aSJason Sarich } 226125ddc8aSJason Sarich } 227125ddc8aSJason Sarich PetscFunctionReturn(0); 228125ddc8aSJason Sarich } 229ca88a2e0STodd Munson 230*9371c9d4SSatish Balay static PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi) { 231a7e14dcfSSatish Balay /* Phi = .5*[x(1)^2 sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */ 232a7e14dcfSSatish Balay PetscInt i, j, k; 233a7e14dcfSSatish Balay PetscReal sqrt2 = PetscSqrtReal(2.0); 234f06e3bfaSBarry Smith 235a7e14dcfSSatish Balay PetscFunctionBegin; 236a7e14dcfSSatish Balay j = 0; 237a7e14dcfSSatish Balay for (i = 0; i < n; i++) { 238a7e14dcfSSatish Balay phi[j] = 0.5 * x[i] * x[i]; 239a7e14dcfSSatish Balay j++; 240a7e14dcfSSatish Balay for (k = i + 1; k < n; k++) { 241a7e14dcfSSatish Balay phi[j] = x[i] * x[k] / sqrt2; 242a7e14dcfSSatish Balay j++; 243a7e14dcfSSatish Balay } 244a7e14dcfSSatish Balay } 245a7e14dcfSSatish Balay PetscFunctionReturn(0); 246a7e14dcfSSatish Balay } 247a7e14dcfSSatish Balay 248*9371c9d4SSatish Balay static PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP) { 249a7e14dcfSSatish Balay /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x' 250a7e14dcfSSatish Balay that satisfies the interpolation conditions Q(X[:,j]) = f(j) 251a7e14dcfSSatish Balay for j=1,...,m and with a Hessian matrix of least Frobenius norm */ 252a7e14dcfSSatish Balay 253a7e14dcfSSatish Balay /* NB --we are ignoring c */ 254a7e14dcfSSatish Balay PetscInt i, j, k, num, np = mfqP->nmodelpoints; 255a7e14dcfSSatish Balay PetscReal one = 1.0, zero = 0.0, negone = -1.0; 256a7e14dcfSSatish Balay PetscBLASInt blasnpmax = mfqP->npmax; 257a7e14dcfSSatish Balay PetscBLASInt blasnplus1 = mfqP->n + 1; 258a7e14dcfSSatish Balay PetscBLASInt blasnp = np; 259a7e14dcfSSatish Balay PetscBLASInt blasint = mfqP->n * (mfqP->n + 1) / 2; 260a7e14dcfSSatish Balay PetscBLASInt blasint2 = np - mfqP->n - 1; 261f06e3bfaSBarry Smith PetscBLASInt info, ione = 1; 262a7e14dcfSSatish Balay PetscReal sqrt2 = PetscSqrtReal(2.0); 263a7e14dcfSSatish Balay 264a7e14dcfSSatish Balay PetscFunctionBegin; 265*9371c9d4SSatish Balay for (i = 0; i < mfqP->n * mfqP->m; i++) { mfqP->Gdel[i] = 0; } 266*9371c9d4SSatish Balay for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; i++) { mfqP->Hdel[i] = 0; } 267a7e14dcfSSatish Balay 268a7e14dcfSSatish Balay /* factor M */ 269792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&blasnplus1, &blasnp, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &info)); 27063a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrf returned with value %" PetscBLASInt_FMT, info); 271a7e14dcfSSatish Balay 272a7e14dcfSSatish Balay if (np == mfqP->n + 1) { 273*9371c9d4SSatish Balay for (i = 0; i < mfqP->npmax - mfqP->n - 1; i++) { mfqP->omega[i] = 0.0; } 274*9371c9d4SSatish Balay for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2; i++) { mfqP->beta[i] = 0.0; } 275a7e14dcfSSatish Balay } else { 276a7e14dcfSSatish Balay /* Let Ltmp = (L'*L) */ 277792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &blasint2, &blasint2, &blasint, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &zero, mfqP->L_tmp, &blasint)); 278a7e14dcfSSatish Balay 279a7e14dcfSSatish Balay /* factor Ltmp */ 280792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &blasint2, mfqP->L_tmp, &blasint, &info)); 28163a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrf returned with value %" PetscBLASInt_FMT, info); 282a7e14dcfSSatish Balay } 283a7e14dcfSSatish Balay 284a7e14dcfSSatish Balay for (k = 0; k < mfqP->m; k++) { 285a7e14dcfSSatish Balay if (np != mfqP->n + 1) { 286a7e14dcfSSatish Balay /* Solve L'*L*Omega = Z' * RESk*/ 287792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasnp, &blasint2, &one, mfqP->Z, &blasnpmax, &mfqP->RES[mfqP->npmax * k], &ione, &zero, mfqP->omega, &ione)); 288792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &blasint2, &ione, mfqP->L_tmp, &blasint, mfqP->omega, &blasint2, &info)); 28963a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrs returned with value %" PetscBLASInt_FMT, info); 290a7e14dcfSSatish Balay 291a7e14dcfSSatish Balay /* Beta = L*Omega */ 292792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasint, &blasint2, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, mfqP->omega, &ione, &zero, mfqP->beta, &ione)); 293a7e14dcfSSatish Balay } 294a7e14dcfSSatish Balay 295a7e14dcfSSatish Balay /* solve M'*Alpha = RESk - N'*Beta */ 296792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasint, &blasnp, &negone, mfqP->N, &blasint, mfqP->beta, &ione, &one, &mfqP->RES[mfqP->npmax * k], &ione)); 297792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &blasnplus1, &ione, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &mfqP->RES[mfqP->npmax * k], &blasnplus1, &info)); 29863a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrs returned with value %" PetscBLASInt_FMT, info); 299a7e14dcfSSatish Balay 300a7e14dcfSSatish Balay /* Gdel(:,k) = Alpha(2:n+1) */ 301*9371c9d4SSatish Balay for (i = 0; i < mfqP->n; i++) { mfqP->Gdel[i + mfqP->n * k] = mfqP->RES[mfqP->npmax * k + i + 1]; } 302a7e14dcfSSatish Balay 303a7e14dcfSSatish Balay /* Set Hdels */ 304a7e14dcfSSatish Balay num = 0; 305a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 306a7e14dcfSSatish Balay /* H[i,i,k] = Beta(num) */ 307a7e14dcfSSatish Balay mfqP->Hdel[(i * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num]; 308a7e14dcfSSatish Balay num++; 309a7e14dcfSSatish Balay for (j = i + 1; j < mfqP->n; j++) { 310a7e14dcfSSatish Balay /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */ 311a7e14dcfSSatish Balay mfqP->Hdel[(j * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num] / sqrt2; 312a7e14dcfSSatish Balay mfqP->Hdel[(i * mfqP->n + j) * mfqP->m + k] = mfqP->beta[num] / sqrt2; 313a7e14dcfSSatish Balay num++; 314a7e14dcfSSatish Balay } 315a7e14dcfSSatish Balay } 316a7e14dcfSSatish Balay } 317a7e14dcfSSatish Balay PetscFunctionReturn(0); 318a7e14dcfSSatish Balay } 319a7e14dcfSSatish Balay 320*9371c9d4SSatish Balay static PetscErrorCode morepoints(TAO_POUNDERS *mfqP) { 321a7e14dcfSSatish Balay /* Assumes mfqP->model_indices[0] is minimum index 322a7e14dcfSSatish Balay Finishes adding points to mfqP->model_indices (up to npmax) 323a7e14dcfSSatish Balay Computes L,Z,M,N 324a7e14dcfSSatish Balay np is actual number of points in model (should equal npmax?) */ 325a7e14dcfSSatish Balay PetscInt point, i, j, offset; 326a7e14dcfSSatish Balay PetscInt reject; 327f06e3bfaSBarry Smith PetscBLASInt blasn = mfqP->n, blasnpmax = mfqP->npmax, blasnplus1 = mfqP->n + 1, info, blasnmax = mfqP->nmax, blasint, blasint2, blasnp, blasmaxmn; 3285e081366SBarry Smith const PetscReal *x; 3295e081366SBarry Smith PetscReal normd; 330a7e14dcfSSatish Balay 331f06e3bfaSBarry Smith PetscFunctionBegin; 332a7e14dcfSSatish Balay /* Initialize M,N */ 333a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 3349566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x)); 335a7e14dcfSSatish Balay mfqP->M[(mfqP->n + 1) * i] = 1.0; 336*9371c9d4SSatish Balay for (j = 0; j < mfqP->n; j++) { mfqP->M[j + 1 + ((mfqP->n + 1) * i)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; } 3379566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x)); 3389566063dSJacob Faibussowitsch PetscCall(phi2eval(&mfqP->M[1 + ((mfqP->n + 1) * i)], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * i])); 339a7e14dcfSSatish Balay } 340a7e14dcfSSatish Balay 341a7e14dcfSSatish Balay /* Now we add points until we have npmax starting with the most recent ones */ 342a7e14dcfSSatish Balay point = mfqP->nHist - 1; 343a7e14dcfSSatish Balay mfqP->nmodelpoints = mfqP->n + 1; 344a7e14dcfSSatish Balay while (mfqP->nmodelpoints < mfqP->npmax && point >= 0) { 345a7e14dcfSSatish Balay /* Reject any points already in the model */ 346a7e14dcfSSatish Balay reject = 0; 347a7e14dcfSSatish Balay for (j = 0; j < mfqP->n + 1; j++) { 348a7e14dcfSSatish Balay if (point == mfqP->model_indices[j]) { 349a7e14dcfSSatish Balay reject = 1; 350a7e14dcfSSatish Balay break; 351a7e14dcfSSatish Balay } 352a7e14dcfSSatish Balay } 353a7e14dcfSSatish Balay 354a7e14dcfSSatish Balay /* Reject if norm(d) >c2 */ 355a7e14dcfSSatish Balay if (!reject) { 3569566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[point], mfqP->workxvec)); 3579566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->workxvec, -1.0, mfqP->Xhist[mfqP->minindex])); 3589566063dSJacob Faibussowitsch PetscCall(VecNorm(mfqP->workxvec, NORM_2, &normd)); 359a7e14dcfSSatish Balay normd /= mfqP->delta; 360*9371c9d4SSatish Balay if (normd > mfqP->c2) { reject = 1; } 361a7e14dcfSSatish Balay } 362f06e3bfaSBarry Smith if (reject) { 363a7e14dcfSSatish Balay point--; 364a7e14dcfSSatish Balay continue; 365a7e14dcfSSatish Balay } 366a7e14dcfSSatish Balay 3679566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[point], &x)); 368a7e14dcfSSatish Balay mfqP->M[(mfqP->n + 1) * mfqP->nmodelpoints] = 1.0; 369*9371c9d4SSatish Balay for (j = 0; j < mfqP->n; j++) { mfqP->M[j + 1 + ((mfqP->n + 1) * mfqP->nmodelpoints)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; } 3709566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[point], &x)); 3719566063dSJacob Faibussowitsch PetscCall(phi2eval(&mfqP->M[1 + (mfqP->n + 1) * mfqP->nmodelpoints], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * (mfqP->nmodelpoints)])); 372a7e14dcfSSatish Balay 373a7e14dcfSSatish Balay /* Update QR factorization */ 374a7e14dcfSSatish Balay /* Copy M' to Q_tmp */ 375a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 376*9371c9d4SSatish Balay for (j = 0; j < mfqP->npmax; j++) { mfqP->Q_tmp[j + mfqP->npmax * i] = mfqP->M[i + (mfqP->n + 1) * j]; } 377a7e14dcfSSatish Balay } 378a7e14dcfSSatish Balay blasnp = mfqP->nmodelpoints + 1; 379a7e14dcfSSatish Balay /* Q_tmp,R = qr(M') */ 380a7e14dcfSSatish Balay blasmaxmn = PetscMax(mfqP->m, mfqP->n + 1); 381792fecdfSBarry Smith PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->mwork, &blasmaxmn, &info)); 38263a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine geqrf returned with value %" PetscBLASInt_FMT, info); 383a7e14dcfSSatish Balay 384a7e14dcfSSatish Balay /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */ 385a7e14dcfSSatish Balay /* L = N*Qtmp */ 386a7e14dcfSSatish Balay blasint2 = mfqP->n * (mfqP->n + 1) / 2; 387a7e14dcfSSatish Balay /* Copy N to L_tmp */ 388*9371c9d4SSatish Balay for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2 * mfqP->npmax; i++) { mfqP->L_tmp[i] = mfqP->N[i]; } 389a7e14dcfSSatish Balay /* Copy L_save to L_tmp */ 390a7e14dcfSSatish Balay 391a7e14dcfSSatish Balay /* L_tmp = N*Qtmp' */ 392792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasint2, &blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->L_tmp, &blasint2, mfqP->npmaxwork, &blasnmax, &info)); 39363a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info); 394a7e14dcfSSatish Balay 395a7e14dcfSSatish Balay /* Copy L_tmp to L_save */ 396*9371c9d4SSatish Balay for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) { mfqP->L_save[i] = mfqP->L_tmp[i]; } 397a7e14dcfSSatish Balay 398a7e14dcfSSatish Balay /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */ 399a7e14dcfSSatish Balay blasint = mfqP->nmodelpoints - mfqP->n; 4009566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 401792fecdfSBarry Smith PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &blasint2, &blasint, &mfqP->L_tmp[(mfqP->n + 1) * blasint2], &blasint2, mfqP->beta, mfqP->work, &blasn, mfqP->work, &blasn, mfqP->npmaxwork, &blasnmax, &info)); 4029566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop()); 40363a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine gesvd returned with value %" PetscBLASInt_FMT, info); 404a7e14dcfSSatish Balay 405a7e14dcfSSatish Balay if (mfqP->beta[PetscMin(blasint, blasint2) - 1] > mfqP->theta2) { 406a7e14dcfSSatish Balay /* accept point */ 407a7e14dcfSSatish Balay mfqP->model_indices[mfqP->nmodelpoints] = point; 408a7e14dcfSSatish Balay /* Copy Q_tmp to Q */ 409*9371c9d4SSatish Balay for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) { mfqP->Q[i] = mfqP->Q_tmp[i]; } 410*9371c9d4SSatish Balay for (i = 0; i < mfqP->npmax; i++) { mfqP->tau[i] = mfqP->tau_tmp[i]; } 411a7e14dcfSSatish Balay mfqP->nmodelpoints++; 4121f4a0ff5SJason Sarich blasnp = mfqP->nmodelpoints; 413a7e14dcfSSatish Balay 414a7e14dcfSSatish Balay /* Copy L_save to L */ 415*9371c9d4SSatish Balay for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) { mfqP->L[i] = mfqP->L_save[i]; } 416a7e14dcfSSatish Balay } 417a7e14dcfSSatish Balay point--; 418a7e14dcfSSatish Balay } 419a7e14dcfSSatish Balay 420a7e14dcfSSatish Balay blasnp = mfqP->nmodelpoints; 421a7e14dcfSSatish Balay /* Copy Q(:,n+2:np) to Z */ 422a7e14dcfSSatish Balay /* First set Q_tmp to I */ 423*9371c9d4SSatish Balay for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) { mfqP->Q_tmp[i] = 0.0; } 424*9371c9d4SSatish Balay for (i = 0; i < mfqP->npmax; i++) { mfqP->Q_tmp[i + mfqP->npmax * i] = 1.0; } 425a7e14dcfSSatish Balay 426a7e14dcfSSatish Balay /* Q_tmp = I * Q */ 427792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasnp, &blasnp, &blasnplus1, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info)); 42863a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info); 429a7e14dcfSSatish Balay 430a7e14dcfSSatish Balay /* Copy Q_tmp(:,n+2:np) to Z) */ 431a7e14dcfSSatish Balay offset = mfqP->npmax * (mfqP->n + 1); 432*9371c9d4SSatish Balay for (i = offset; i < mfqP->npmax * mfqP->npmax; i++) { mfqP->Z[i - offset] = mfqP->Q_tmp[i]; } 433a7e14dcfSSatish Balay 434a7e14dcfSSatish Balay if (mfqP->nmodelpoints == mfqP->n + 1) { 435a7e14dcfSSatish Balay /* Set L to I_{n+1} */ 436*9371c9d4SSatish Balay for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) { mfqP->L[i] = 0.0; } 437*9371c9d4SSatish Balay for (i = 0; i < mfqP->n; i++) { mfqP->L[(mfqP->n * (mfqP->n + 1) / 2) * i + i] = 1.0; } 438a7e14dcfSSatish Balay } 439a7e14dcfSSatish Balay PetscFunctionReturn(0); 440a7e14dcfSSatish Balay } 441a7e14dcfSSatish Balay 442a7e14dcfSSatish Balay /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */ 443*9371c9d4SSatish Balay static PetscErrorCode addpoint(Tao tao, TAO_POUNDERS *mfqP, PetscInt index) { 444a7e14dcfSSatish Balay PetscFunctionBegin; 445a7e14dcfSSatish Balay /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/ 4469566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->Xhist[0], &mfqP->Xhist[mfqP->nHist])); 4479566063dSJacob Faibussowitsch PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, &mfqP->Q_tmp[index * mfqP->npmax], INSERT_VALUES)); 4489566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist])); 4499566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist])); 4509566063dSJacob Faibussowitsch PetscCall(VecAYPX(mfqP->Xhist[mfqP->nHist], mfqP->delta, mfqP->Xhist[mfqP->minindex])); 451a7e14dcfSSatish Balay 452a7e14dcfSSatish Balay /* Project into feasible region */ 453*9371c9d4SSatish Balay if (tao->XU && tao->XL) { PetscCall(VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU, mfqP->Xhist[mfqP->nHist])); } 454a7e14dcfSSatish Balay 455a7e14dcfSSatish Balay /* Compute value of new vector */ 4569566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->Fhist[0], &mfqP->Fhist[mfqP->nHist])); 457a7e14dcfSSatish Balay CHKMEMQ; 4589566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist])); 459a7e14dcfSSatish Balay 460a7e14dcfSSatish Balay /* Add new vector to model */ 461a7e14dcfSSatish Balay mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist; 462a7e14dcfSSatish Balay mfqP->nmodelpoints++; 463a7e14dcfSSatish Balay mfqP->nHist++; 464a7e14dcfSSatish Balay PetscFunctionReturn(0); 465a7e14dcfSSatish Balay } 466a7e14dcfSSatish Balay 467*9371c9d4SSatish Balay static PetscErrorCode modelimprove(Tao tao, TAO_POUNDERS *mfqP, PetscInt addallpoints) { 468a7e14dcfSSatish Balay /* modeld = Q(:,np+1:n)' */ 469a7e14dcfSSatish Balay PetscInt i, j, minindex = 0; 470e270355aSBarry Smith PetscReal dp, half = 0.5, one = 1.0, minvalue = PETSC_INFINITY; 471a7e14dcfSSatish Balay PetscBLASInt blasn = mfqP->n, blasnpmax = mfqP->npmax, blask, info; 472a7e14dcfSSatish Balay PetscBLASInt blas1 = 1, blasnmax = mfqP->nmax; 473f06e3bfaSBarry Smith 474362febeeSStefano Zampini PetscFunctionBegin; 475a7e14dcfSSatish Balay blask = mfqP->nmodelpoints; 476a7e14dcfSSatish Balay /* Qtmp = I(n x n) */ 477a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 478*9371c9d4SSatish Balay for (j = 0; j < mfqP->n; j++) { mfqP->Q_tmp[i + mfqP->npmax * j] = 0.0; } 479a7e14dcfSSatish Balay } 480*9371c9d4SSatish Balay for (j = 0; j < mfqP->n; j++) { mfqP->Q_tmp[j + mfqP->npmax * j] = 1.0; } 481a7e14dcfSSatish Balay 482a7e14dcfSSatish Balay /* Qtmp = Q * I */ 483792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasn, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info)); 484a7e14dcfSSatish Balay 485a7e14dcfSSatish Balay for (i = mfqP->nmodelpoints; i < mfqP->n; i++) { 486792fecdfSBarry Smith PetscCallBLAS("BLASdot", dp = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->Gres, &blas1)); 487a7e14dcfSSatish Balay if (dp > 0.0) { /* Model says use the other direction! */ 488*9371c9d4SSatish Balay for (j = 0; j < mfqP->n; j++) { mfqP->Q_tmp[i * mfqP->npmax + j] *= -1; } 489a7e14dcfSSatish Balay } 490a7e14dcfSSatish Balay /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */ 491*9371c9d4SSatish Balay for (j = 0; j < mfqP->n; j++) { mfqP->work2[j] = mfqP->Gres[j]; } 492792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &half, mfqP->Hres, &blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, &one, mfqP->work2, &blas1)); 493792fecdfSBarry Smith PetscCallBLAS("BLASdot", mfqP->work[i] = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->work2, &blas1)); 494a7e14dcfSSatish Balay if (i == mfqP->nmodelpoints || mfqP->work[i] < minvalue) { 495a7e14dcfSSatish Balay minindex = i; 496a7e14dcfSSatish Balay minvalue = mfqP->work[i]; 497a7e14dcfSSatish Balay } 498*9371c9d4SSatish Balay if (addallpoints != 0) { PetscCall(addpoint(tao, mfqP, i)); } 499a7e14dcfSSatish Balay } 500*9371c9d4SSatish Balay if (!addallpoints) { PetscCall(addpoint(tao, mfqP, minindex)); } 501a7e14dcfSSatish Balay PetscFunctionReturn(0); 502a7e14dcfSSatish Balay } 503a7e14dcfSSatish Balay 504*9371c9d4SSatish Balay static PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, PetscReal c) { 505a7e14dcfSSatish Balay PetscInt i, j; 506a7e14dcfSSatish Balay PetscBLASInt blasm = mfqP->m, blasj, blask, blasn = mfqP->n, ione = 1, info; 507a7e14dcfSSatish Balay PetscBLASInt blasnpmax = mfqP->npmax, blasmaxmn; 508a7e14dcfSSatish Balay PetscReal proj, normd; 5095e081366SBarry Smith const PetscReal *x; 510a7e14dcfSSatish Balay 511f06e3bfaSBarry Smith PetscFunctionBegin; 512a7e14dcfSSatish Balay for (i = mfqP->nHist - 1; i >= 0; i--) { 5139566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[i], &x)); 514*9371c9d4SSatish Balay for (j = 0; j < mfqP->n; j++) { mfqP->work[j] = (x[j] - xmin[j]) / mfqP->delta; } 5159566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[i], &x)); 516792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, mfqP->work2, &ione)); 517792fecdfSBarry Smith PetscCallBLAS("BLASnrm2", normd = BLASnrm2_(&blasn, mfqP->work, &ione)); 518a8bd9cb3STodd Munson if (normd <= c) { 51929c6e7daSBarry Smith blasj = PetscMax((mfqP->n - mfqP->nmodelpoints), 0); 520a7e14dcfSSatish Balay if (!mfqP->q_is_I) { 521a7e14dcfSSatish Balay /* project D onto null */ 522a7e14dcfSSatish Balay blask = (mfqP->nmodelpoints); 523792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &ione, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->work2, &ione, mfqP->mwork, &blasm, &info)); 52463a3b9bcSJacob Faibussowitsch PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "ormqr returned value %" PetscBLASInt_FMT, info); 525a7e14dcfSSatish Balay } 526792fecdfSBarry Smith PetscCallBLAS("BLASnrm2", proj = BLASnrm2_(&blasj, &mfqP->work2[mfqP->nmodelpoints], &ione)); 527a7e14dcfSSatish Balay 528a7e14dcfSSatish Balay if (proj >= mfqP->theta1) { /* add this index to model */ 529a7e14dcfSSatish Balay mfqP->model_indices[mfqP->nmodelpoints] = i; 530a7e14dcfSSatish Balay mfqP->nmodelpoints++; 531792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, &mfqP->Q_tmp[mfqP->npmax * (mfqP->nmodelpoints - 1)], &ione)); 532a7e14dcfSSatish Balay blask = mfqP->npmax * (mfqP->nmodelpoints); 533792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blask, mfqP->Q_tmp, &ione, mfqP->Q, &ione)); 534a7e14dcfSSatish Balay blask = mfqP->nmodelpoints; 535a7e14dcfSSatish Balay blasmaxmn = PetscMax(mfqP->m, mfqP->n); 536792fecdfSBarry Smith PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->mwork, &blasmaxmn, &info)); 53763a3b9bcSJacob Faibussowitsch PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "geqrf returned value %" PetscBLASInt_FMT, info); 538a7e14dcfSSatish Balay mfqP->q_is_I = 0; 539a7e14dcfSSatish Balay } 540*9371c9d4SSatish Balay if (mfqP->nmodelpoints == mfqP->n) { break; } 541a7e14dcfSSatish Balay } 542a7e14dcfSSatish Balay } 543babe810cSJason Sarich 544a7e14dcfSSatish Balay PetscFunctionReturn(0); 545a7e14dcfSSatish Balay } 546f06e3bfaSBarry Smith 547*9371c9d4SSatish Balay static PetscErrorCode TaoSolve_POUNDERS(Tao tao) { 548a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 5498931d482SJason Sarich PetscInt i, ii, j, k, l; 550a7e14dcfSSatish Balay PetscReal step = 1.0; 551a7e14dcfSSatish Balay PetscInt low, high; 552a7e14dcfSSatish Balay PetscReal minnorm; 5535e081366SBarry Smith PetscReal *x, *f; 5545e081366SBarry Smith const PetscReal *xmint, *fmin; 555bd026e97SJed Brown PetscReal deltaold; 556a7e14dcfSSatish Balay PetscReal gnorm; 557a7e14dcfSSatish Balay PetscBLASInt info, ione = 1, iblas; 558ff2b74efSJason Sarich PetscBool valid, same; 559a7e14dcfSSatish Balay PetscReal mdec, rho, normxsp; 560a7e14dcfSSatish Balay PetscReal one = 1.0, zero = 0.0, ratio; 561125ddc8aSJason Sarich PetscBLASInt blasm, blasn, blasncopy, blasnpmax; 562a7bbbda8SBarry Smith static PetscBool set = PETSC_FALSE; 563a7e14dcfSSatish Balay 564a7e14dcfSSatish Balay /* n = # of parameters 565a7e14dcfSSatish Balay m = dimension (components) of function */ 566a7e14dcfSSatish Balay PetscFunctionBegin; 5679566063dSJacob Faibussowitsch PetscCall(PetscCitationsRegister("@article{UNEDF0,\n" 568a7bbbda8SBarry Smith "title = {Nuclear energy density optimization},\n" 569a7bbbda8SBarry Smith "author = {Kortelainen, M. and Lesinski, T. and Mor\'e, J. and Nazarewicz, W.\n" 570a7bbbda8SBarry Smith " and Sarich, J. and Schunck, N. and Stoitsov, M. V. and Wild, S. },\n" 571a7bbbda8SBarry Smith "journal = {Phys. Rev. C},\n" 572a7bbbda8SBarry Smith "volume = {82},\n" 573a7bbbda8SBarry Smith "number = {2},\n" 574a7bbbda8SBarry Smith "pages = {024313},\n" 575a7bbbda8SBarry Smith "numpages = {18},\n" 576a7bbbda8SBarry Smith "year = {2010},\n" 577a7bbbda8SBarry Smith "month = {Aug},\n" 578*9371c9d4SSatish Balay "doi = {10.1103/PhysRevC.82.024313}\n}\n", 579*9371c9d4SSatish Balay &set)); 580e6d4cb7fSJason Sarich tao->niter = 0; 581a7e14dcfSSatish Balay if (tao->XL && tao->XU) { 582a7e14dcfSSatish Balay /* Check x0 <= XU */ 583a7e14dcfSSatish Balay PetscReal val; 584ce902467SBarry Smith 5859566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->solution, mfqP->Xhist[0])); 5869566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU)); 5879566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 5883c859ba3SBarry Smith PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 > upper bound"); 589a7e14dcfSSatish Balay 590a7e14dcfSSatish Balay /* Check x0 >= xl */ 5919566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->XL, mfqP->Xhist[0])); 5929566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->solution)); 5939566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 5943c859ba3SBarry Smith PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 < lower bound"); 595f06e3bfaSBarry Smith 596a7e14dcfSSatish Balay /* Check x0 + delta < XU -- should be able to get around this eventually */ 597a7e14dcfSSatish Balay 5989566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->Xhist[0], mfqP->delta)); 5999566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], 1.0, tao->solution)); 6009566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU)); 6019566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 6023c859ba3SBarry Smith PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 + delta > upper bound"); 603a7e14dcfSSatish Balay } 604a7e14dcfSSatish Balay 605*9371c9d4SSatish Balay blasm = mfqP->m; 606*9371c9d4SSatish Balay blasn = mfqP->n; 607*9371c9d4SSatish Balay blasnpmax = mfqP->npmax; 608ce902467SBarry Smith for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; ++i) mfqP->H[i] = 0; 609a7e14dcfSSatish Balay 6109566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->solution, mfqP->Xhist[0])); 611ce902467SBarry Smith 612ce902467SBarry Smith /* This provides enough information to approximate the gradient of the objective */ 613ce902467SBarry Smith /* using a forward difference scheme. */ 614ce902467SBarry Smith 6159566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Initialize simplex; delta = %10.9e\n", (double)mfqP->delta)); 6169566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[0], mfqP->Fhist[0], &mfqP->Fres[0])); 617a7e14dcfSSatish Balay mfqP->minindex = 0; 618a7e14dcfSSatish Balay minnorm = mfqP->Fres[0]; 619ff2b74efSJason Sarich 6209566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(mfqP->Xhist[0], &low, &high)); 621805332fbSTodd Munson for (i = 1; i < mfqP->n + 1; ++i) { 6229566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[0], mfqP->Xhist[i])); 623ce902467SBarry Smith 624a7e14dcfSSatish Balay if (i - 1 >= low && i - 1 < high) { 6259566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Xhist[i], &x)); 626a7e14dcfSSatish Balay x[i - 1 - low] += mfqP->delta; 6279566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Xhist[i], &x)); 628a7e14dcfSSatish Balay } 629a7e14dcfSSatish Balay CHKMEMQ; 6309566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[i], mfqP->Fhist[i], &mfqP->Fres[i])); 631a7e14dcfSSatish Balay if (mfqP->Fres[i] < minnorm) { 632a7e14dcfSSatish Balay mfqP->minindex = i; 633a7e14dcfSSatish Balay minnorm = mfqP->Fres[i]; 634a7e14dcfSSatish Balay } 635a7e14dcfSSatish Balay } 6369566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 6379566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res)); 6389566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Finalize simplex; minnorm = %10.9e\n", (double)minnorm)); 639ce902467SBarry Smith 640a7e14dcfSSatish Balay /* Gather mpi vecs to one big local vec */ 641a7e14dcfSSatish Balay 642a7e14dcfSSatish Balay /* Begin serial code */ 643a7e14dcfSSatish Balay 644a7e14dcfSSatish Balay /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */ 645a7e14dcfSSatish Balay /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */ 646a7e14dcfSSatish Balay /* (Column oriented for blas calls) */ 647a7e14dcfSSatish Balay ii = 0; 648a7e14dcfSSatish Balay 64963a3b9bcSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Build matrix: %" PetscInt_FMT "\n", (PetscInt)mfqP->size)); 650805332fbSTodd Munson if (1 == mfqP->size) { 6519566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 652a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 6539566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 6549566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 655a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 656a7e14dcfSSatish Balay if (i == mfqP->minindex) continue; 657a7e14dcfSSatish Balay 6589566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Xhist[i], &x)); 659*9371c9d4SSatish Balay for (j = 0; j < mfqP->n; j++) { mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; } 6609566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Xhist[i], &x)); 661a7e14dcfSSatish Balay 6629566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Fhist[i], &f)); 663*9371c9d4SSatish Balay for (j = 0; j < mfqP->m; j++) { mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; } 6649566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Fhist[i], &f)); 66594e8dccaSTodd Munson 666a7e14dcfSSatish Balay mfqP->model_indices[ii++] = i; 667a7e14dcfSSatish Balay } 668*9371c9d4SSatish Balay for (j = 0; j < mfqP->m; j++) { mfqP->C[j] = fmin[j]; } 6699566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 670a7e14dcfSSatish Balay } else { 6719566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->localxmin, 0)); 6729566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD)); 6739566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD)); 674ce902467SBarry Smith 6759566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->localxmin, &xmint)); 676a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 6779566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->localxmin, &xmint)); 678a7e14dcfSSatish Balay 6799566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD)); 6809566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD)); 6819566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->localfmin, &fmin)); 682a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 683a7e14dcfSSatish Balay if (i == mfqP->minindex) continue; 684a7e14dcfSSatish Balay 6859566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD)); 6869566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD)); 6879566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->localx, &x)); 688*9371c9d4SSatish Balay for (j = 0; j < mfqP->n; j++) { mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; } 6899566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->localx, &x)); 690a7e14dcfSSatish Balay 6919566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD)); 6929566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD)); 6939566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->localf, &f)); 694*9371c9d4SSatish Balay for (j = 0; j < mfqP->m; j++) { mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; } 6959566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->localf, &f)); 69694e8dccaSTodd Munson 69794e8dccaSTodd Munson mfqP->model_indices[ii++] = i; 698a7e14dcfSSatish Balay } 699*9371c9d4SSatish Balay for (j = 0; j < mfqP->m; j++) { mfqP->C[j] = fmin[j]; } 7009566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->localfmin, &fmin)); 701a7e14dcfSSatish Balay } 702a7e14dcfSSatish Balay 703a7e14dcfSSatish Balay /* Determine the initial quadratic models */ 704a7e14dcfSSatish Balay /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */ 705a7e14dcfSSatish Balay /* D (nxn) Fdiff (nxm) => G (nxm) */ 706125ddc8aSJason Sarich blasncopy = blasn; 707792fecdfSBarry Smith PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&blasn, &blasm, mfqP->Disp, &blasnpmax, mfqP->iwork, mfqP->Fdiff, &blasncopy, &info)); 70863a3b9bcSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Linear solve return: %" PetscInt_FMT "\n", (PetscInt)info)); 709a7e14dcfSSatish Balay 7109566063dSJacob Faibussowitsch PetscCall(pounders_update_res(tao)); 711a7e14dcfSSatish Balay 712a7e14dcfSSatish Balay valid = PETSC_TRUE; 713a7e14dcfSSatish Balay 7149566063dSJacob Faibussowitsch PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 7159566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(tao->gradient)); 7169566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(tao->gradient)); 7179566063dSJacob Faibussowitsch PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 718a7e14dcfSSatish Balay gnorm *= mfqP->delta; 7199566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 720763847b4SAlp Dener 721763847b4SAlp Dener tao->reason = TAO_CONTINUE_ITERATING; 7229566063dSJacob Faibussowitsch PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its)); 7239566063dSJacob Faibussowitsch PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step)); 724dbbe0bcdSBarry Smith PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 725763847b4SAlp Dener 726a7e14dcfSSatish Balay mfqP->nHist = mfqP->n + 1; 727a7e14dcfSSatish Balay mfqP->nmodelpoints = mfqP->n + 1; 7289566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Initial gradient: %20.19e\n", (double)gnorm)); 729a7e14dcfSSatish Balay 730763847b4SAlp Dener while (tao->reason == TAO_CONTINUE_ITERATING) { 731a7e14dcfSSatish Balay PetscReal gnm = 1e-4; 732e1e80dc8SAlp Dener /* Call general purpose update function */ 733dbbe0bcdSBarry Smith PetscTryTypeMethod(tao, update, tao->niter, tao->user_update); 7348931d482SJason Sarich tao->niter++; 735ce902467SBarry Smith /* Solve the subproblem min{Q(s): ||s|| <= 1.0} */ 7369566063dSJacob Faibussowitsch PetscCall(gqtwrap(tao, &gnm, &mdec)); 737a7e14dcfSSatish Balay /* Evaluate the function at the new point */ 738a7e14dcfSSatish Balay 739*9371c9d4SSatish Balay for (i = 0; i < mfqP->n; i++) { mfqP->work[i] = mfqP->Xsubproblem[i] * mfqP->delta + mfqP->xmin[i]; } 7409566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[mfqP->nHist])); 7419566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[mfqP->nHist])); 7429566063dSJacob Faibussowitsch PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, mfqP->work, INSERT_VALUES)); 7439566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist])); 7449566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist])); 745a7e14dcfSSatish Balay 7469566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist])); 747125ddc8aSJason Sarich 748a7e14dcfSSatish Balay rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec; 749a7e14dcfSSatish Balay mfqP->nHist++; 750a7e14dcfSSatish Balay 751a7e14dcfSSatish Balay /* Update the center */ 752a7e14dcfSSatish Balay if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid == PETSC_TRUE)) { 753a7e14dcfSSatish Balay /* Update model to reflect new base point */ 754*9371c9d4SSatish Balay for (i = 0; i < mfqP->n; i++) { mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i]) / mfqP->delta; } 755a7e14dcfSSatish Balay for (j = 0; j < mfqP->m; j++) { 756a7e14dcfSSatish Balay /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work'; 757a7e14dcfSSatish Balay G(:,j) = G(:,j) + H(:,:,j)*work' */ 758a7e14dcfSSatish Balay for (k = 0; k < mfqP->n; k++) { 759a7e14dcfSSatish Balay mfqP->work2[k] = 0.0; 760*9371c9d4SSatish Balay for (l = 0; l < mfqP->n; l++) { mfqP->work2[k] += mfqP->H[j + mfqP->m * (k + l * mfqP->n)] * mfqP->work[l]; } 761a7e14dcfSSatish Balay } 762a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 763a7e14dcfSSatish Balay mfqP->C[j] += mfqP->work[i] * (mfqP->Fdiff[i + mfqP->n * j] + 0.5 * mfqP->work2[i]); 764a7e14dcfSSatish Balay mfqP->Fdiff[i + mfqP->n * j] += mfqP->work2[i]; 765a7e14dcfSSatish Balay } 766a7e14dcfSSatish Balay } 767a7e14dcfSSatish Balay /* Cres += work*Gres + .5*work*Hres*work'; 768a7e14dcfSSatish Balay Gres += Hres*work'; */ 769a7e14dcfSSatish Balay 770792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &one, mfqP->Hres, &blasn, mfqP->work, &ione, &zero, mfqP->work2, &ione)); 771*9371c9d4SSatish Balay for (i = 0; i < mfqP->n; i++) { mfqP->Gres[i] += mfqP->work2[i]; } 772a7e14dcfSSatish Balay mfqP->minindex = mfqP->nHist - 1; 773a7e14dcfSSatish Balay minnorm = mfqP->Fres[mfqP->minindex]; 7749566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res)); 775a7e14dcfSSatish Balay /* Change current center */ 7769566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 777*9371c9d4SSatish Balay for (i = 0; i < mfqP->n; i++) { mfqP->xmin[i] = xmint[i]; } 7789566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 779a7e14dcfSSatish Balay } 780a7e14dcfSSatish Balay 781a7e14dcfSSatish Balay /* Evaluate at a model-improving point if necessary */ 782a7e14dcfSSatish Balay if (valid == PETSC_FALSE) { 783a7e14dcfSSatish Balay mfqP->q_is_I = 1; 7848a35fac9SJason Sarich mfqP->nmodelpoints = 0; 7859566063dSJacob Faibussowitsch PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1)); 786a7e14dcfSSatish Balay if (mfqP->nmodelpoints < mfqP->n) { 7879566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Model not valid -- model-improving\n")); 7889566063dSJacob Faibussowitsch PetscCall(modelimprove(tao, mfqP, 1)); 789a7e14dcfSSatish Balay } 790a7e14dcfSSatish Balay } 791a7e14dcfSSatish Balay 792a7e14dcfSSatish Balay /* Update the trust region radius */ 793a7e14dcfSSatish Balay deltaold = mfqP->delta; 794a7e14dcfSSatish Balay normxsp = 0; 795*9371c9d4SSatish Balay for (i = 0; i < mfqP->n; i++) { normxsp += mfqP->Xsubproblem[i] * mfqP->Xsubproblem[i]; } 796a7e14dcfSSatish Balay normxsp = PetscSqrtReal(normxsp); 797a7e14dcfSSatish Balay if (rho >= mfqP->eta1 && normxsp > 0.5 * mfqP->delta) { 798a7e14dcfSSatish Balay mfqP->delta = PetscMin(mfqP->delta * mfqP->gamma1, mfqP->deltamax); 799a7e14dcfSSatish Balay } else if (valid == PETSC_TRUE) { 800a7e14dcfSSatish Balay mfqP->delta = PetscMax(mfqP->delta * mfqP->gamma0, mfqP->deltamin); 801a7e14dcfSSatish Balay } 802a7e14dcfSSatish Balay 803a7e14dcfSSatish Balay /* Compute the next interpolation set */ 804a7e14dcfSSatish Balay mfqP->q_is_I = 1; 805a7e14dcfSSatish Balay mfqP->nmodelpoints = 0; 8069566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c1 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c1)); 8079566063dSJacob Faibussowitsch PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1)); 808a7e14dcfSSatish Balay if (mfqP->nmodelpoints == mfqP->n) { 809a7e14dcfSSatish Balay valid = PETSC_TRUE; 810a7e14dcfSSatish Balay } else { 811a7e14dcfSSatish Balay valid = PETSC_FALSE; 8129566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c2 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c2)); 8139566063dSJacob Faibussowitsch PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c2)); 814a7e14dcfSSatish Balay if (mfqP->n > mfqP->nmodelpoints) { 8159566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Model not valid -- adding geometry points\n")); 8169566063dSJacob Faibussowitsch PetscCall(modelimprove(tao, mfqP, mfqP->n - mfqP->nmodelpoints)); 817a7e14dcfSSatish Balay } 818a7e14dcfSSatish Balay } 819*9371c9d4SSatish Balay for (i = mfqP->nmodelpoints; i > 0; i--) { mfqP->model_indices[i] = mfqP->model_indices[i - 1]; } 820a7e14dcfSSatish Balay mfqP->nmodelpoints++; 821a7e14dcfSSatish Balay mfqP->model_indices[0] = mfqP->minindex; 8229566063dSJacob Faibussowitsch PetscCall(morepoints(mfqP)); 823a7e14dcfSSatish Balay for (i = 0; i < mfqP->nmodelpoints; i++) { 8249566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Xhist[mfqP->model_indices[i]], &x)); 825*9371c9d4SSatish Balay for (j = 0; j < mfqP->n; j++) { mfqP->Disp[i + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / deltaold; } 8269566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]], &x)); 8279566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Fhist[mfqP->model_indices[i]], &f)); 828a7e14dcfSSatish Balay for (j = 0; j < mfqP->m; j++) { 829a7e14dcfSSatish Balay for (k = 0; k < mfqP->n; k++) { 830a7e14dcfSSatish Balay mfqP->work[k] = 0.0; 831*9371c9d4SSatish Balay for (l = 0; l < mfqP->n; l++) { mfqP->work[k] += mfqP->H[j + mfqP->m * (k + mfqP->n * l)] * mfqP->Disp[i + mfqP->npmax * l]; } 832a7e14dcfSSatish Balay } 833792fecdfSBarry Smith PetscCallBLAS("BLASdot", mfqP->RES[j * mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn, &mfqP->Fdiff[j * mfqP->n], &ione, &mfqP->Disp[i], &blasnpmax) - 0.5 * BLASdot_(&blasn, mfqP->work, &ione, &mfqP->Disp[i], &blasnpmax) + f[j]); 834a7e14dcfSSatish Balay } 8359566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]], &f)); 836a7e14dcfSSatish Balay } 837a7e14dcfSSatish Balay 838a7e14dcfSSatish Balay /* Update the quadratic model */ 83963a3b9bcSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Get Quad, size: %" PetscInt_FMT ", points: %" PetscInt_FMT "\n", mfqP->n, mfqP->nmodelpoints)); 8409566063dSJacob Faibussowitsch PetscCall(getquadpounders(mfqP)); 8419566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 842792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasm, fmin, &ione, mfqP->C, &ione)); 843a7e14dcfSSatish Balay /* G = G*(delta/deltaold) + Gdel */ 844a7e14dcfSSatish Balay ratio = mfqP->delta / deltaold; 845a7e14dcfSSatish Balay iblas = blasm * blasn; 846792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->Fdiff, &ione)); 847792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Gdel, &ione, mfqP->Fdiff, &ione)); 848125ddc8aSJason Sarich /* H = H*(delta/deltaold)^2 + Hdel */ 849a7e14dcfSSatish Balay iblas = blasm * blasn * blasn; 850a7e14dcfSSatish Balay ratio *= ratio; 851792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->H, &ione)); 852792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Hdel, &ione, mfqP->H, &ione)); 853a7e14dcfSSatish Balay 854a7e14dcfSSatish Balay /* Get residuals */ 8559566063dSJacob Faibussowitsch PetscCall(pounders_update_res(tao)); 856a7e14dcfSSatish Balay 857a7e14dcfSSatish Balay /* Export solution and gradient residual to TAO */ 8589566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 8599566063dSJacob Faibussowitsch PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 8609566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(tao->gradient)); 8619566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(tao->gradient)); 8629566063dSJacob Faibussowitsch PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 863a7e14dcfSSatish Balay gnorm *= mfqP->delta; 864a7e14dcfSSatish Balay /* final criticality test */ 8659566063dSJacob Faibussowitsch PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its)); 8669566063dSJacob Faibussowitsch PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step)); 867dbbe0bcdSBarry Smith PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 868ff2b74efSJason Sarich /* test for repeated model */ 869c9376581SJason Sarich if (mfqP->nmodelpoints == mfqP->last_nmodelpoints) { 870c9376581SJason Sarich same = PETSC_TRUE; 871c9376581SJason Sarich } else { 872c9376581SJason Sarich same = PETSC_FALSE; 873c9376581SJason Sarich } 874ff2b74efSJason Sarich for (i = 0; i < mfqP->nmodelpoints; i++) { 875c9376581SJason Sarich if (same) { 876c9376581SJason Sarich if (mfqP->model_indices[i] == mfqP->last_model_indices[i]) { 877c9376581SJason Sarich same = PETSC_TRUE; 878c9376581SJason Sarich } else { 879c9376581SJason Sarich same = PETSC_FALSE; 880c9376581SJason Sarich } 881c9376581SJason Sarich } 882ff2b74efSJason Sarich mfqP->last_model_indices[i] = mfqP->model_indices[i]; 883ff2b74efSJason Sarich } 884ff2b74efSJason Sarich mfqP->last_nmodelpoints = mfqP->nmodelpoints; 885ff2b74efSJason Sarich if (same && mfqP->delta == deltaold) { 8869566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Identical model used in successive iterations\n")); 8875d72676cSJason Sarich tao->reason = TAO_CONVERGED_STEPTOL; 888ff2b74efSJason Sarich } 889a7e14dcfSSatish Balay } 890a7e14dcfSSatish Balay PetscFunctionReturn(0); 891a7e14dcfSSatish Balay } 892a7e14dcfSSatish Balay 893*9371c9d4SSatish Balay static PetscErrorCode TaoSetUp_POUNDERS(Tao tao) { 894a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 895125ddc8aSJason Sarich PetscInt i, j; 896a7e14dcfSSatish Balay IS isfloc, isfglob, isxloc, isxglob; 897a7e14dcfSSatish Balay 898a7e14dcfSSatish Balay PetscFunctionBegin; 8999566063dSJacob Faibussowitsch if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient)); 9009566063dSJacob Faibussowitsch if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection)); 9019566063dSJacob Faibussowitsch PetscCall(VecGetSize(tao->solution, &mfqP->n)); 9029566063dSJacob Faibussowitsch PetscCall(VecGetSize(tao->ls_res, &mfqP->m)); 903a7e14dcfSSatish Balay mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n); 904*9371c9d4SSatish Balay if (mfqP->npmax == PETSC_DEFAULT) { mfqP->npmax = 2 * mfqP->n + 1; } 905a7e14dcfSSatish Balay mfqP->npmax = PetscMin((mfqP->n + 1) * (mfqP->n + 2) / 2, mfqP->npmax); 906a7e14dcfSSatish Balay mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n + 2); 907a7e14dcfSSatish Balay 9089566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Xhist)); 9099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fhist)); 910a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 9119566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[i])); 9129566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[i])); 913a7e14dcfSSatish Balay } 9149566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->solution, &mfqP->workxvec)); 9159566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->ls_res, &mfqP->workfvec)); 916a7e14dcfSSatish Balay mfqP->nHist = 0; 917a7e14dcfSSatish Balay 9189566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fres)); 9199566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->m, &mfqP->RES)); 9209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->work)); 9219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->work2)); 9229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->work3)); 9239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n + 1), &mfqP->mwork)); 9249566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax - mfqP->n - 1, &mfqP->omega)); 9259566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2, &mfqP->beta)); 9269566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n + 1, &mfqP->alpha)); 927a7e14dcfSSatish Balay 9289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->H)); 9299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q)); 9309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q_tmp)); 9319566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L)); 9329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_tmp)); 9339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_save)); 9349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->N)); 9359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->n + 1), &mfqP->M)); 9369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->npmax - mfqP->n - 1), &mfqP->Z)); 9379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau)); 9389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau_tmp)); 939a7e14dcfSSatish Balay mfqP->nmax = PetscMax(5 * mfqP->npmax, mfqP->n * (mfqP->n + 1) / 2); 9409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxwork)); 9419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxiwork)); 9429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->xmin)); 9439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m, &mfqP->C)); 9449566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Fdiff)); 9459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->n, &mfqP->Disp)); 9469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->Gres)); 9479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Hres)); 9489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Gpoints)); 9499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->model_indices)); 9509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->last_model_indices)); 9519566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->Xsubproblem)); 9529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Gdel)); 9539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->Hdel)); 9549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n), &mfqP->indices)); 9559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->iwork)); 9569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m * mfqP->m, &mfqP->w)); 957125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 958125ddc8aSJason Sarich for (j = 0; j < mfqP->m; j++) { 959125ddc8aSJason Sarich if (i == j) { 960125ddc8aSJason Sarich mfqP->w[i + mfqP->m * j] = 1.0; 961125ddc8aSJason Sarich } else { 962125ddc8aSJason Sarich mfqP->w[i + mfqP->m * j] = 0.0; 963125ddc8aSJason Sarich } 964125ddc8aSJason Sarich } 965125ddc8aSJason Sarich } 966*9371c9d4SSatish Balay for (i = 0; i < PetscMax(mfqP->m, mfqP->n); i++) { mfqP->indices[i] = i; } 9679566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(((PetscObject)tao)->comm, &mfqP->size)); 968f40bd260SBarry Smith if (mfqP->size > 1) { 9699566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localx)); 9709566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localxmin)); 9719566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localf)); 9729566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localfmin)); 9739566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxloc)); 9749566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxglob)); 9759566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfloc)); 9769566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfglob)); 977a7e14dcfSSatish Balay 9789566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(tao->solution, isxglob, mfqP->localx, isxloc, &mfqP->scatterx)); 9799566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(tao->ls_res, isfglob, mfqP->localf, isfloc, &mfqP->scatterf)); 980a7e14dcfSSatish Balay 9819566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isxloc)); 9829566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isxglob)); 9839566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isfloc)); 9849566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isfglob)); 985a7e14dcfSSatish Balay } 986a7e14dcfSSatish Balay 987a7e14dcfSSatish Balay if (!mfqP->usegqt) { 988a7e14dcfSSatish Balay KSP ksp; 989a7e14dcfSSatish Balay PC pc; 9909566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Xsubproblem, &mfqP->subx)); 9919566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->subxl)); 9929566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subb)); 9939566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subxu)); 9949566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subpdel)); 9959566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subndel)); 9969566063dSJacob Faibussowitsch PetscCall(TaoCreate(PETSC_COMM_SELF, &mfqP->subtao)); 9979566063dSJacob Faibussowitsch PetscCall(PetscObjectIncrementTabLevel((PetscObject)mfqP->subtao, (PetscObject)tao, 1)); 9989566063dSJacob Faibussowitsch PetscCall(TaoSetType(mfqP->subtao, TAOBNTR)); 9999566063dSJacob Faibussowitsch PetscCall(TaoSetOptionsPrefix(mfqP->subtao, "pounders_subsolver_")); 10009566063dSJacob Faibussowitsch PetscCall(TaoSetSolution(mfqP->subtao, mfqP->subx)); 10019566063dSJacob Faibussowitsch PetscCall(TaoSetObjectiveAndGradient(mfqP->subtao, NULL, pounders_fg, (void *)mfqP)); 10029566063dSJacob Faibussowitsch PetscCall(TaoSetMaximumIterations(mfqP->subtao, mfqP->gqt_maxits)); 10039566063dSJacob Faibussowitsch PetscCall(TaoSetFromOptions(mfqP->subtao)); 10049566063dSJacob Faibussowitsch PetscCall(TaoGetKSP(mfqP->subtao, &ksp)); 1005a7e14dcfSSatish Balay if (ksp) { 10069566063dSJacob Faibussowitsch PetscCall(KSPGetPC(ksp, &pc)); 10079566063dSJacob Faibussowitsch PetscCall(PCSetType(pc, PCNONE)); 1008a7e14dcfSSatish Balay } 10099566063dSJacob Faibussowitsch PetscCall(TaoSetVariableBounds(mfqP->subtao, mfqP->subxl, mfqP->subxu)); 10109566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Hres, &mfqP->subH)); 10119566063dSJacob Faibussowitsch PetscCall(TaoSetHessian(mfqP->subtao, mfqP->subH, mfqP->subH, pounders_h, (void *)mfqP)); 1012a7e14dcfSSatish Balay } 1013a7e14dcfSSatish Balay PetscFunctionReturn(0); 1014a7e14dcfSSatish Balay } 1015a7e14dcfSSatish Balay 1016*9371c9d4SSatish Balay static PetscErrorCode TaoDestroy_POUNDERS(Tao tao) { 1017a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1018a7e14dcfSSatish Balay PetscInt i; 1019a7e14dcfSSatish Balay 1020a7e14dcfSSatish Balay PetscFunctionBegin; 1021a7e14dcfSSatish Balay if (!mfqP->usegqt) { 10229566063dSJacob Faibussowitsch PetscCall(TaoDestroy(&mfqP->subtao)); 10239566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subx)); 10249566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subxl)); 10259566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subxu)); 10269566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subb)); 10279566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subpdel)); 10289566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subndel)); 10299566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mfqP->subH)); 1030a7e14dcfSSatish Balay } 10319566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Fres)); 10329566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->RES)); 10339566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->work)); 10349566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->work2)); 10359566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->work3)); 10369566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->mwork)); 10379566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->omega)); 10389566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->beta)); 10399566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->alpha)); 10409566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->H)); 10419566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Q)); 10429566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Q_tmp)); 10439566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->L)); 10449566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->L_tmp)); 10459566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->L_save)); 10469566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->N)); 10479566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->M)); 10489566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Z)); 10499566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->tau)); 10509566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->tau_tmp)); 10519566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->npmaxwork)); 10529566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->npmaxiwork)); 10539566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->xmin)); 10549566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->C)); 10559566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Fdiff)); 10569566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Disp)); 10579566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Gres)); 10589566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Hres)); 10599566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Gpoints)); 10609566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->model_indices)); 10619566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->last_model_indices)); 10629566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Xsubproblem)); 10639566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Gdel)); 10649566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Hdel)); 10659566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->indices)); 10669566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->iwork)); 10679566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->w)); 1068a7e14dcfSSatish Balay for (i = 0; i < mfqP->nHist; i++) { 10699566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->Xhist[i])); 10709566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->Fhist[i])); 1071a7e14dcfSSatish Balay } 10729566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->workxvec)); 10739566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->workfvec)); 10749566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Xhist)); 10759566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Fhist)); 1076a7e14dcfSSatish Balay 1077f40bd260SBarry Smith if (mfqP->size > 1) { 10789566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localx)); 10799566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localxmin)); 10809566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localf)); 10819566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localfmin)); 1082a7e14dcfSSatish Balay } 10839566063dSJacob Faibussowitsch PetscCall(PetscFree(tao->data)); 1084a7e14dcfSSatish Balay PetscFunctionReturn(0); 1085a7e14dcfSSatish Balay } 1086a7e14dcfSSatish Balay 1087*9371c9d4SSatish Balay static PetscErrorCode TaoSetFromOptions_POUNDERS(Tao tao, PetscOptionItems *PetscOptionsObject) { 1088a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1089a7e14dcfSSatish Balay 1090a7e14dcfSSatish Balay PetscFunctionBegin; 1091d0609cedSBarry Smith PetscOptionsHeadBegin(PetscOptionsObject, "POUNDERS method for least-squares optimization"); 10929566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-tao_pounders_delta", "initial delta", "", mfqP->delta, &mfqP->delta0, NULL)); 1093ff2b74efSJason Sarich mfqP->delta = mfqP->delta0; 10949566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-tao_pounders_npmax", "max number of points in model", "", mfqP->npmax, &mfqP->npmax, NULL)); 10959566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-tao_pounders_gqt", "use gqt algorithm for subproblem", "", mfqP->usegqt, &mfqP->usegqt, NULL)); 1096d0609cedSBarry Smith PetscOptionsHeadEnd(); 1097a7e14dcfSSatish Balay PetscFunctionReturn(0); 1098a7e14dcfSSatish Balay } 1099a7e14dcfSSatish Balay 1100*9371c9d4SSatish Balay static PetscErrorCode TaoView_POUNDERS(Tao tao, PetscViewer viewer) { 1101a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1102a7e14dcfSSatish Balay PetscBool isascii; 1103a7e14dcfSSatish Balay 1104a7e14dcfSSatish Balay PetscFunctionBegin; 11059566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1106a7e14dcfSSatish Balay if (isascii) { 11079566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "initial delta: %g\n", (double)mfqP->delta0)); 11089566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "final delta: %g\n", (double)mfqP->delta)); 110963a3b9bcSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "model points: %" PetscInt_FMT "\n", mfqP->nmodelpoints)); 1110a7e14dcfSSatish Balay if (mfqP->usegqt) { 11119566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "subproblem solver: gqt\n")); 1112a7e14dcfSSatish Balay } else { 11139566063dSJacob Faibussowitsch PetscCall(TaoView(mfqP->subtao, viewer)); 1114a7e14dcfSSatish Balay } 1115a7e14dcfSSatish Balay } 1116a7e14dcfSSatish Balay PetscFunctionReturn(0); 1117a7e14dcfSSatish Balay } 11181eb8069cSJason Sarich /*MC 11191eb8069cSJason Sarich TAOPOUNDERS - POUNDERS derivate-free model-based algorithm for nonlinear least squares 11201eb8069cSJason Sarich 11211eb8069cSJason Sarich Options Database Keys: 11221eb8069cSJason Sarich + -tao_pounders_delta - initial step length 11231eb8069cSJason Sarich . -tao_pounders_npmax - maximum number of points in model 11241eb8069cSJason Sarich - -tao_pounders_gqt - use gqt algorithm for subproblem instead of TRON 11251eb8069cSJason Sarich 11261eb8069cSJason Sarich Level: beginner 11271eb8069cSJason Sarich 11281eb8069cSJason Sarich M*/ 1129a7e14dcfSSatish Balay 1130*9371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode TaoCreate_POUNDERS(Tao tao) { 1131a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1132a7e14dcfSSatish Balay 1133a7e14dcfSSatish Balay PetscFunctionBegin; 1134a7e14dcfSSatish Balay tao->ops->setup = TaoSetUp_POUNDERS; 1135a7e14dcfSSatish Balay tao->ops->solve = TaoSolve_POUNDERS; 1136a7e14dcfSSatish Balay tao->ops->view = TaoView_POUNDERS; 1137a7e14dcfSSatish Balay tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS; 1138a7e14dcfSSatish Balay tao->ops->destroy = TaoDestroy_POUNDERS; 1139a7e14dcfSSatish Balay 11409566063dSJacob Faibussowitsch PetscCall(PetscNewLog(tao, &mfqP)); 1141a7e14dcfSSatish Balay tao->data = (void *)mfqP; 11426552cf8aSJason Sarich /* Override default settings (unless already changed) */ 11436552cf8aSJason Sarich if (!tao->max_it_changed) tao->max_it = 2000; 11446552cf8aSJason Sarich if (!tao->max_funcs_changed) tao->max_funcs = 4000; 1145f553c655STodd Munson mfqP->npmax = PETSC_DEFAULT; 1146ff2b74efSJason Sarich mfqP->delta0 = 0.1; 1147a7e14dcfSSatish Balay mfqP->delta = 0.1; 1148a7e14dcfSSatish Balay mfqP->deltamax = 1e3; 1149125ddc8aSJason Sarich mfqP->deltamin = 1e-6; 1150a8bd9cb3STodd Munson mfqP->c2 = 10.0; 1151a7e14dcfSSatish Balay mfqP->theta1 = 1e-5; 1152a7e14dcfSSatish Balay mfqP->theta2 = 1e-4; 1153a7e14dcfSSatish Balay mfqP->gamma0 = 0.5; 1154a7e14dcfSSatish Balay mfqP->gamma1 = 2.0; 1155a7e14dcfSSatish Balay mfqP->eta0 = 0.0; 1156a7e14dcfSSatish Balay mfqP->eta1 = 0.1; 1157f553c655STodd Munson mfqP->usegqt = PETSC_FALSE; 1158a7e14dcfSSatish Balay mfqP->gqt_rtol = 0.001; 1159a7e14dcfSSatish Balay mfqP->gqt_maxits = 50; 116083c8fe1dSLisandro Dalcin mfqP->workxvec = NULL; 1161a7e14dcfSSatish Balay PetscFunctionReturn(0); 1162a7e14dcfSSatish Balay } 1163