1aaa7dc30SBarry Smith #include <../src/tao/leastsquares/impls/pounders/pounders.h> 2a7e14dcfSSatish Balay 3*d71ae5a4SJacob Faibussowitsch static PetscErrorCode pounders_h(Tao subtao, Vec v, Mat H, Mat Hpre, void *ctx) 4*d71ae5a4SJacob Faibussowitsch { 5a7e14dcfSSatish Balay PetscFunctionBegin; 6a7e14dcfSSatish Balay PetscFunctionReturn(0); 7a7e14dcfSSatish Balay } 8ca88a2e0STodd Munson 9*d71ae5a4SJacob Faibussowitsch static PetscErrorCode pounders_fg(Tao subtao, Vec x, PetscReal *f, Vec g, void *ctx) 10*d71ae5a4SJacob Faibussowitsch { 11a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)ctx; 12a7e14dcfSSatish Balay PetscReal d1, d2; 13f06e3bfaSBarry Smith 14a7e14dcfSSatish Balay PetscFunctionBegin; 15a7e14dcfSSatish Balay /* g = A*x (add b later)*/ 169566063dSJacob Faibussowitsch PetscCall(MatMult(mfqP->subH, x, g)); 17a7e14dcfSSatish Balay 18a7e14dcfSSatish Balay /* f = 1/2 * x'*(Ax) + b'*x */ 199566063dSJacob Faibussowitsch PetscCall(VecDot(x, g, &d1)); 209566063dSJacob Faibussowitsch PetscCall(VecDot(mfqP->subb, x, &d2)); 21a7e14dcfSSatish Balay *f = 0.5 * d1 + d2; 22a7e14dcfSSatish Balay 23a7e14dcfSSatish Balay /* now g = g + b */ 249566063dSJacob Faibussowitsch PetscCall(VecAXPY(g, 1.0, mfqP->subb)); 25a7e14dcfSSatish Balay PetscFunctionReturn(0); 26a7e14dcfSSatish Balay } 27ca88a2e0STodd Munson 28*d71ae5a4SJacob Faibussowitsch static PetscErrorCode pounders_feval(Tao tao, Vec x, Vec F, PetscReal *fsum) 29*d71ae5a4SJacob Faibussowitsch { 30125ddc8aSJason Sarich TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 31125ddc8aSJason Sarich PetscInt i, row, col; 32125ddc8aSJason Sarich PetscReal fr, fc; 33691b26d3SBarry Smith 34125ddc8aSJason Sarich PetscFunctionBegin; 359566063dSJacob Faibussowitsch PetscCall(TaoComputeResidual(tao, x, F)); 36737f463aSAlp Dener if (tao->res_weights_v) { 379566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(mfqP->workfvec, tao->res_weights_v, F)); 389566063dSJacob Faibussowitsch PetscCall(VecDot(mfqP->workfvec, mfqP->workfvec, fsum)); 39737f463aSAlp Dener } else if (tao->res_weights_w) { 40125ddc8aSJason Sarich *fsum = 0; 41737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 42737f463aSAlp Dener row = tao->res_weights_rows[i]; 43737f463aSAlp Dener col = tao->res_weights_cols[i]; 449566063dSJacob Faibussowitsch PetscCall(VecGetValues(F, 1, &row, &fr)); 459566063dSJacob Faibussowitsch PetscCall(VecGetValues(F, 1, &col, &fc)); 46737f463aSAlp Dener *fsum += tao->res_weights_w[i] * fc * fr; 47125ddc8aSJason Sarich } 48125ddc8aSJason Sarich } else { 499566063dSJacob Faibussowitsch PetscCall(VecDot(F, F, fsum)); 50125ddc8aSJason Sarich } 519566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Least-squares residual norm: %20.19e\n", (double)*fsum)); 523c859ba3SBarry Smith PetscCheck(!PetscIsInfOrNanReal(*fsum), PETSC_COMM_SELF, PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 53125ddc8aSJason Sarich PetscFunctionReturn(0); 54125ddc8aSJason Sarich } 55f06e3bfaSBarry Smith 56*d71ae5a4SJacob Faibussowitsch static PetscErrorCode gqtwrap(Tao tao, PetscReal *gnorm, PetscReal *qmin) 57*d71ae5a4SJacob Faibussowitsch { 586f4723b1SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 596f4723b1SBarry Smith PetscReal atol = 1.0e-5; 606f4723b1SBarry Smith #else 61a7e14dcfSSatish Balay PetscReal atol = 1.0e-10; 626f4723b1SBarry Smith #endif 63a7e14dcfSSatish Balay PetscInt info, its; 64a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 65a7e14dcfSSatish Balay 66a7e14dcfSSatish Balay PetscFunctionBegin; 67a7e14dcfSSatish Balay if (!mfqP->usegqt) { 68a7e14dcfSSatish Balay PetscReal maxval; 69a7e14dcfSSatish Balay PetscInt i, j; 70a7e14dcfSSatish Balay 719566063dSJacob Faibussowitsch PetscCall(VecSetValues(mfqP->subb, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 729566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(mfqP->subb)); 739566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(mfqP->subb)); 74a7e14dcfSSatish Balay 759566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->subx, 0.0)); 76a7e14dcfSSatish Balay 779566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->subndel, -1.0)); 789566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->subpdel, +1.0)); 79a7e14dcfSSatish Balay 80ce902467SBarry Smith /* Complete the lower triangle of the Hessian matrix */ 81a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 82ad540459SPierre Jolivet for (j = i + 1; j < mfqP->n; j++) mfqP->Hres[j + mfqP->n * i] = mfqP->Hres[mfqP->n * j + i]; 83a7e14dcfSSatish Balay } 849566063dSJacob Faibussowitsch PetscCall(MatSetValues(mfqP->subH, mfqP->n, mfqP->indices, mfqP->n, mfqP->indices, mfqP->Hres, INSERT_VALUES)); 859566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mfqP->subH, MAT_FINAL_ASSEMBLY)); 869566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mfqP->subH, MAT_FINAL_ASSEMBLY)); 87a7e14dcfSSatish Balay 889566063dSJacob Faibussowitsch PetscCall(TaoResetStatistics(mfqP->subtao)); 899566063dSJacob Faibussowitsch /* PetscCall(TaoSetTolerances(mfqP->subtao,*gnorm,*gnorm,PETSC_DEFAULT)); */ 90a7e14dcfSSatish Balay /* enforce bound constraints -- experimental */ 91a7e14dcfSSatish Balay if (tao->XU && tao->XL) { 929566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->XU, mfqP->subxu)); 939566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subxu, -1.0, tao->solution)); 949566063dSJacob Faibussowitsch PetscCall(VecScale(mfqP->subxu, 1.0 / mfqP->delta)); 959566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->XL, mfqP->subxl)); 969566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subxl, -1.0, tao->solution)); 979566063dSJacob Faibussowitsch PetscCall(VecScale(mfqP->subxl, 1.0 / mfqP->delta)); 98a7e14dcfSSatish Balay 999566063dSJacob Faibussowitsch PetscCall(VecPointwiseMin(mfqP->subxu, mfqP->subxu, mfqP->subpdel)); 1009566063dSJacob Faibussowitsch PetscCall(VecPointwiseMax(mfqP->subxl, mfqP->subxl, mfqP->subndel)); 101a7e14dcfSSatish Balay } else { 1029566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subpdel, mfqP->subxu)); 1039566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subndel, mfqP->subxl)); 104a7e14dcfSSatish Balay } 105a7e14dcfSSatish Balay /* Make sure xu > xl */ 1069566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 1079566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 1089566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 1093c859ba3SBarry Smith PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "upper bound < lower bound in subproblem"); 110a7e14dcfSSatish Balay /* Make sure xu > tao->solution > xl */ 1119566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 1129566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx)); 1139566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 1143c859ba3SBarry Smith PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess < lower bound in subproblem"); 115a7e14dcfSSatish Balay 1169566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subx, mfqP->subpdel)); 1179566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 1189566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 1193c859ba3SBarry Smith PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess > upper bound in subproblem"); 120a7e14dcfSSatish Balay 1219566063dSJacob Faibussowitsch PetscCall(TaoSolve(mfqP->subtao)); 1229566063dSJacob Faibussowitsch PetscCall(TaoGetSolutionStatus(mfqP->subtao, NULL, qmin, NULL, NULL, NULL, NULL)); 123a7e14dcfSSatish Balay 124a7e14dcfSSatish Balay /* test bounds post-solution*/ 1259566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 1269566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx)); 1279566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 128a7e14dcfSSatish Balay if (maxval > 1e-5) { 1299566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "subproblem solution < lower bound\n")); 130a7e14dcfSSatish Balay tao->reason = TAO_DIVERGED_TR_REDUCTION; 131a7e14dcfSSatish Balay } 132a7e14dcfSSatish Balay 1339566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subx, mfqP->subpdel)); 1349566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 1359566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 136a7e14dcfSSatish Balay if (maxval > 1e-5) { 1379566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "subproblem solution > upper bound\n")); 138a7e14dcfSSatish Balay tao->reason = TAO_DIVERGED_TR_REDUCTION; 139a7e14dcfSSatish Balay } 140a7e14dcfSSatish Balay } else { 141f06e3bfaSBarry Smith gqt(mfqP->n, mfqP->Hres, mfqP->n, mfqP->Gres, 1.0, mfqP->gqt_rtol, atol, mfqP->gqt_maxits, gnorm, qmin, mfqP->Xsubproblem, &info, &its, mfqP->work, mfqP->work2, mfqP->work3); 142a7e14dcfSSatish Balay } 143a7e14dcfSSatish Balay *qmin *= -1; 144a7e14dcfSSatish Balay PetscFunctionReturn(0); 145a7e14dcfSSatish Balay } 146a7e14dcfSSatish Balay 147*d71ae5a4SJacob Faibussowitsch static PetscErrorCode pounders_update_res(Tao tao) 148*d71ae5a4SJacob Faibussowitsch { 149125ddc8aSJason Sarich TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 150125ddc8aSJason Sarich PetscInt i, row, col; 151125ddc8aSJason Sarich PetscBLASInt blasn = mfqP->n, blasn2 = blasn * blasn, blasm = mfqP->m, ione = 1; 1526c95174dSJason Sarich PetscReal zero = 0.0, one = 1.0, wii, factor; 153125ddc8aSJason Sarich 154691b26d3SBarry Smith PetscFunctionBegin; 155ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] = 0; 156ad540459SPierre Jolivet for (i = 0; i < mfqP->n * mfqP->n; i++) mfqP->Hres[i] = 0; 157125ddc8aSJason Sarich 158125ddc8aSJason Sarich /* Compute Gres= sum_ij[wij * (cjgi + cigj)] */ 159737f463aSAlp Dener if (tao->res_weights_v) { 1606c95174dSJason Sarich /* Vector(diagonal) weights: gres = sum_i(wii*ci*gi) */ 161125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 1629566063dSJacob Faibussowitsch PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &factor)); 1636c95174dSJason Sarich factor = factor * mfqP->C[i]; 164792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * i], &ione, mfqP->Gres, &ione)); 165125ddc8aSJason Sarich } 166125ddc8aSJason Sarich 167125ddc8aSJason Sarich /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 1686c95174dSJason Sarich /* vector(diagonal weights) Hres = sum_i(wii*(ci*Hi + gi * gi')*/ 169125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 1709566063dSJacob Faibussowitsch PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &wii)); 171125ddc8aSJason Sarich if (tao->niter > 1) { 172125ddc8aSJason Sarich factor = wii * mfqP->C[i]; 1736c95174dSJason Sarich /* add wii * ci * Hi */ 174792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione)); 175125ddc8aSJason Sarich } 1766c95174dSJason Sarich /* add wii * gi * gi' */ 177792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &wii, &mfqP->Fdiff[blasn * i], &blasn, &mfqP->Fdiff[blasn * i], &blasn, &one, mfqP->Hres, &blasn)); 178125ddc8aSJason Sarich } 179737f463aSAlp Dener } else if (tao->res_weights_w) { 180ce902467SBarry Smith /* General case: .5 * Gres= sum_ij[wij * (cjgi + cigj)] */ 181737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 182737f463aSAlp Dener row = tao->res_weights_rows[i]; 183737f463aSAlp Dener col = tao->res_weights_cols[i]; 184ce902467SBarry Smith 185737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0; 186792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * row], &ione, mfqP->Gres, &ione)); 187737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0; 188792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * col], &ione, mfqP->Gres, &ione)); 189ce902467SBarry Smith } 190ce902467SBarry Smith 191ce902467SBarry Smith /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 1926c95174dSJason Sarich /* .5 * sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 193737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 194737f463aSAlp Dener row = tao->res_weights_rows[i]; 195737f463aSAlp Dener col = tao->res_weights_cols[i]; 196737f463aSAlp Dener factor = tao->res_weights_w[i] / 2.0; 197125ddc8aSJason Sarich /* add wij * gi gj' + wij * gj gi' */ 198792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * row], &blasn, &mfqP->Fdiff[blasn * col], &blasn, &one, mfqP->Hres, &blasn)); 199792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * col], &blasn, &mfqP->Fdiff[blasn * row], &blasn, &one, mfqP->Hres, &blasn)); 200125ddc8aSJason Sarich } 201125ddc8aSJason Sarich if (tao->niter > 1) { 202737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 203737f463aSAlp Dener row = tao->res_weights_rows[i]; 204737f463aSAlp Dener col = tao->res_weights_cols[i]; 205125ddc8aSJason Sarich 206125ddc8aSJason Sarich /* add wij*cj*Hi */ 207737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0; 208792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[row], &blasm, mfqP->Hres, &ione)); 209125ddc8aSJason Sarich 210125ddc8aSJason Sarich /* add wij*ci*Hj */ 211737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0; 212792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[col], &blasm, mfqP->Hres, &ione)); 213125ddc8aSJason Sarich } 214125ddc8aSJason Sarich } 215125ddc8aSJason Sarich } else { 216ce902467SBarry Smith /* Default: Gres= sum_i[cigi] = G*c' */ 2179566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Identity weights\n")); 218792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->C, &ione, &zero, mfqP->Gres, &ione)); 219ce902467SBarry Smith 220ce902467SBarry Smith /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 2216c95174dSJason Sarich /* Hres = G*G' + 0.5 sum {F(xkin,i)*H(:,:,i)} */ 222792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->Fdiff, &blasn, &zero, mfqP->Hres, &blasn)); 223125ddc8aSJason Sarich 224125ddc8aSJason Sarich /* sum(F(xkin,i)*H(:,:,i)) */ 225125ddc8aSJason Sarich if (tao->niter > 1) { 226125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 2276c95174dSJason Sarich factor = mfqP->C[i]; 228792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione)); 229125ddc8aSJason Sarich } 230125ddc8aSJason Sarich } 231125ddc8aSJason Sarich } 232125ddc8aSJason Sarich PetscFunctionReturn(0); 233125ddc8aSJason Sarich } 234ca88a2e0STodd Munson 235*d71ae5a4SJacob Faibussowitsch static PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi) 236*d71ae5a4SJacob Faibussowitsch { 237a7e14dcfSSatish Balay /* Phi = .5*[x(1)^2 sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */ 238a7e14dcfSSatish Balay PetscInt i, j, k; 239a7e14dcfSSatish Balay PetscReal sqrt2 = PetscSqrtReal(2.0); 240f06e3bfaSBarry Smith 241a7e14dcfSSatish Balay PetscFunctionBegin; 242a7e14dcfSSatish Balay j = 0; 243a7e14dcfSSatish Balay for (i = 0; i < n; i++) { 244a7e14dcfSSatish Balay phi[j] = 0.5 * x[i] * x[i]; 245a7e14dcfSSatish Balay j++; 246a7e14dcfSSatish Balay for (k = i + 1; k < n; k++) { 247a7e14dcfSSatish Balay phi[j] = x[i] * x[k] / sqrt2; 248a7e14dcfSSatish Balay j++; 249a7e14dcfSSatish Balay } 250a7e14dcfSSatish Balay } 251a7e14dcfSSatish Balay PetscFunctionReturn(0); 252a7e14dcfSSatish Balay } 253a7e14dcfSSatish Balay 254*d71ae5a4SJacob Faibussowitsch static PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP) 255*d71ae5a4SJacob Faibussowitsch { 256a7e14dcfSSatish Balay /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x' 257a7e14dcfSSatish Balay that satisfies the interpolation conditions Q(X[:,j]) = f(j) 258a7e14dcfSSatish Balay for j=1,...,m and with a Hessian matrix of least Frobenius norm */ 259a7e14dcfSSatish Balay 260a7e14dcfSSatish Balay /* NB --we are ignoring c */ 261a7e14dcfSSatish Balay PetscInt i, j, k, num, np = mfqP->nmodelpoints; 262a7e14dcfSSatish Balay PetscReal one = 1.0, zero = 0.0, negone = -1.0; 263a7e14dcfSSatish Balay PetscBLASInt blasnpmax = mfqP->npmax; 264a7e14dcfSSatish Balay PetscBLASInt blasnplus1 = mfqP->n + 1; 265a7e14dcfSSatish Balay PetscBLASInt blasnp = np; 266a7e14dcfSSatish Balay PetscBLASInt blasint = mfqP->n * (mfqP->n + 1) / 2; 267a7e14dcfSSatish Balay PetscBLASInt blasint2 = np - mfqP->n - 1; 268f06e3bfaSBarry Smith PetscBLASInt info, ione = 1; 269a7e14dcfSSatish Balay PetscReal sqrt2 = PetscSqrtReal(2.0); 270a7e14dcfSSatish Balay 271a7e14dcfSSatish Balay PetscFunctionBegin; 272ad540459SPierre Jolivet for (i = 0; i < mfqP->n * mfqP->m; i++) mfqP->Gdel[i] = 0; 273ad540459SPierre Jolivet for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; i++) mfqP->Hdel[i] = 0; 274a7e14dcfSSatish Balay 275a7e14dcfSSatish Balay /* factor M */ 276792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&blasnplus1, &blasnp, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &info)); 27763a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrf returned with value %" PetscBLASInt_FMT, info); 278a7e14dcfSSatish Balay 279a7e14dcfSSatish Balay if (np == mfqP->n + 1) { 280ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax - mfqP->n - 1; i++) mfqP->omega[i] = 0.0; 281ad540459SPierre Jolivet for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->beta[i] = 0.0; 282a7e14dcfSSatish Balay } else { 283a7e14dcfSSatish Balay /* Let Ltmp = (L'*L) */ 284792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &blasint2, &blasint2, &blasint, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &zero, mfqP->L_tmp, &blasint)); 285a7e14dcfSSatish Balay 286a7e14dcfSSatish Balay /* factor Ltmp */ 287792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &blasint2, mfqP->L_tmp, &blasint, &info)); 28863a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrf returned with value %" PetscBLASInt_FMT, info); 289a7e14dcfSSatish Balay } 290a7e14dcfSSatish Balay 291a7e14dcfSSatish Balay for (k = 0; k < mfqP->m; k++) { 292a7e14dcfSSatish Balay if (np != mfqP->n + 1) { 293a7e14dcfSSatish Balay /* Solve L'*L*Omega = Z' * RESk*/ 294792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasnp, &blasint2, &one, mfqP->Z, &blasnpmax, &mfqP->RES[mfqP->npmax * k], &ione, &zero, mfqP->omega, &ione)); 295792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &blasint2, &ione, mfqP->L_tmp, &blasint, mfqP->omega, &blasint2, &info)); 29663a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrs returned with value %" PetscBLASInt_FMT, info); 297a7e14dcfSSatish Balay 298a7e14dcfSSatish Balay /* Beta = L*Omega */ 299792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasint, &blasint2, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, mfqP->omega, &ione, &zero, mfqP->beta, &ione)); 300a7e14dcfSSatish Balay } 301a7e14dcfSSatish Balay 302a7e14dcfSSatish Balay /* solve M'*Alpha = RESk - N'*Beta */ 303792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasint, &blasnp, &negone, mfqP->N, &blasint, mfqP->beta, &ione, &one, &mfqP->RES[mfqP->npmax * k], &ione)); 304792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &blasnplus1, &ione, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &mfqP->RES[mfqP->npmax * k], &blasnplus1, &info)); 30563a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrs returned with value %" PetscBLASInt_FMT, info); 306a7e14dcfSSatish Balay 307a7e14dcfSSatish Balay /* Gdel(:,k) = Alpha(2:n+1) */ 308ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->Gdel[i + mfqP->n * k] = mfqP->RES[mfqP->npmax * k + i + 1]; 309a7e14dcfSSatish Balay 310a7e14dcfSSatish Balay /* Set Hdels */ 311a7e14dcfSSatish Balay num = 0; 312a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 313a7e14dcfSSatish Balay /* H[i,i,k] = Beta(num) */ 314a7e14dcfSSatish Balay mfqP->Hdel[(i * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num]; 315a7e14dcfSSatish Balay num++; 316a7e14dcfSSatish Balay for (j = i + 1; j < mfqP->n; j++) { 317a7e14dcfSSatish Balay /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */ 318a7e14dcfSSatish Balay mfqP->Hdel[(j * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num] / sqrt2; 319a7e14dcfSSatish Balay mfqP->Hdel[(i * mfqP->n + j) * mfqP->m + k] = mfqP->beta[num] / sqrt2; 320a7e14dcfSSatish Balay num++; 321a7e14dcfSSatish Balay } 322a7e14dcfSSatish Balay } 323a7e14dcfSSatish Balay } 324a7e14dcfSSatish Balay PetscFunctionReturn(0); 325a7e14dcfSSatish Balay } 326a7e14dcfSSatish Balay 327*d71ae5a4SJacob Faibussowitsch static PetscErrorCode morepoints(TAO_POUNDERS *mfqP) 328*d71ae5a4SJacob Faibussowitsch { 329a7e14dcfSSatish Balay /* Assumes mfqP->model_indices[0] is minimum index 330a7e14dcfSSatish Balay Finishes adding points to mfqP->model_indices (up to npmax) 331a7e14dcfSSatish Balay Computes L,Z,M,N 332a7e14dcfSSatish Balay np is actual number of points in model (should equal npmax?) */ 333a7e14dcfSSatish Balay PetscInt point, i, j, offset; 334a7e14dcfSSatish Balay PetscInt reject; 335f06e3bfaSBarry Smith PetscBLASInt blasn = mfqP->n, blasnpmax = mfqP->npmax, blasnplus1 = mfqP->n + 1, info, blasnmax = mfqP->nmax, blasint, blasint2, blasnp, blasmaxmn; 3365e081366SBarry Smith const PetscReal *x; 3375e081366SBarry Smith PetscReal normd; 338a7e14dcfSSatish Balay 339f06e3bfaSBarry Smith PetscFunctionBegin; 340a7e14dcfSSatish Balay /* Initialize M,N */ 341a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 3429566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x)); 343a7e14dcfSSatish Balay mfqP->M[(mfqP->n + 1) * i] = 1.0; 344ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * i)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 3459566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x)); 3469566063dSJacob Faibussowitsch PetscCall(phi2eval(&mfqP->M[1 + ((mfqP->n + 1) * i)], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * i])); 347a7e14dcfSSatish Balay } 348a7e14dcfSSatish Balay 349a7e14dcfSSatish Balay /* Now we add points until we have npmax starting with the most recent ones */ 350a7e14dcfSSatish Balay point = mfqP->nHist - 1; 351a7e14dcfSSatish Balay mfqP->nmodelpoints = mfqP->n + 1; 352a7e14dcfSSatish Balay while (mfqP->nmodelpoints < mfqP->npmax && point >= 0) { 353a7e14dcfSSatish Balay /* Reject any points already in the model */ 354a7e14dcfSSatish Balay reject = 0; 355a7e14dcfSSatish Balay for (j = 0; j < mfqP->n + 1; j++) { 356a7e14dcfSSatish Balay if (point == mfqP->model_indices[j]) { 357a7e14dcfSSatish Balay reject = 1; 358a7e14dcfSSatish Balay break; 359a7e14dcfSSatish Balay } 360a7e14dcfSSatish Balay } 361a7e14dcfSSatish Balay 362a7e14dcfSSatish Balay /* Reject if norm(d) >c2 */ 363a7e14dcfSSatish Balay if (!reject) { 3649566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[point], mfqP->workxvec)); 3659566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->workxvec, -1.0, mfqP->Xhist[mfqP->minindex])); 3669566063dSJacob Faibussowitsch PetscCall(VecNorm(mfqP->workxvec, NORM_2, &normd)); 367a7e14dcfSSatish Balay normd /= mfqP->delta; 368ad540459SPierre Jolivet if (normd > mfqP->c2) reject = 1; 369a7e14dcfSSatish Balay } 370f06e3bfaSBarry Smith if (reject) { 371a7e14dcfSSatish Balay point--; 372a7e14dcfSSatish Balay continue; 373a7e14dcfSSatish Balay } 374a7e14dcfSSatish Balay 3759566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[point], &x)); 376a7e14dcfSSatish Balay mfqP->M[(mfqP->n + 1) * mfqP->nmodelpoints] = 1.0; 377ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * mfqP->nmodelpoints)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 3789566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[point], &x)); 3799566063dSJacob Faibussowitsch PetscCall(phi2eval(&mfqP->M[1 + (mfqP->n + 1) * mfqP->nmodelpoints], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * (mfqP->nmodelpoints)])); 380a7e14dcfSSatish Balay 381a7e14dcfSSatish Balay /* Update QR factorization */ 382a7e14dcfSSatish Balay /* Copy M' to Q_tmp */ 383a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 384ad540459SPierre Jolivet for (j = 0; j < mfqP->npmax; j++) mfqP->Q_tmp[j + mfqP->npmax * i] = mfqP->M[i + (mfqP->n + 1) * j]; 385a7e14dcfSSatish Balay } 386a7e14dcfSSatish Balay blasnp = mfqP->nmodelpoints + 1; 387a7e14dcfSSatish Balay /* Q_tmp,R = qr(M') */ 388a7e14dcfSSatish Balay blasmaxmn = PetscMax(mfqP->m, mfqP->n + 1); 389792fecdfSBarry Smith PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->mwork, &blasmaxmn, &info)); 39063a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine geqrf returned with value %" PetscBLASInt_FMT, info); 391a7e14dcfSSatish Balay 392a7e14dcfSSatish Balay /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */ 393a7e14dcfSSatish Balay /* L = N*Qtmp */ 394a7e14dcfSSatish Balay blasint2 = mfqP->n * (mfqP->n + 1) / 2; 395a7e14dcfSSatish Balay /* Copy N to L_tmp */ 396ad540459SPierre Jolivet for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2 * mfqP->npmax; i++) mfqP->L_tmp[i] = mfqP->N[i]; 397a7e14dcfSSatish Balay /* Copy L_save to L_tmp */ 398a7e14dcfSSatish Balay 399a7e14dcfSSatish Balay /* L_tmp = N*Qtmp' */ 400792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasint2, &blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->L_tmp, &blasint2, mfqP->npmaxwork, &blasnmax, &info)); 40163a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info); 402a7e14dcfSSatish Balay 403a7e14dcfSSatish Balay /* Copy L_tmp to L_save */ 404ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L_save[i] = mfqP->L_tmp[i]; 405a7e14dcfSSatish Balay 406a7e14dcfSSatish Balay /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */ 407a7e14dcfSSatish Balay blasint = mfqP->nmodelpoints - mfqP->n; 4089566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 409792fecdfSBarry Smith PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &blasint2, &blasint, &mfqP->L_tmp[(mfqP->n + 1) * blasint2], &blasint2, mfqP->beta, mfqP->work, &blasn, mfqP->work, &blasn, mfqP->npmaxwork, &blasnmax, &info)); 4109566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop()); 41163a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine gesvd returned with value %" PetscBLASInt_FMT, info); 412a7e14dcfSSatish Balay 413a7e14dcfSSatish Balay if (mfqP->beta[PetscMin(blasint, blasint2) - 1] > mfqP->theta2) { 414a7e14dcfSSatish Balay /* accept point */ 415a7e14dcfSSatish Balay mfqP->model_indices[mfqP->nmodelpoints] = point; 416a7e14dcfSSatish Balay /* Copy Q_tmp to Q */ 417ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q[i] = mfqP->Q_tmp[i]; 418ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax; i++) mfqP->tau[i] = mfqP->tau_tmp[i]; 419a7e14dcfSSatish Balay mfqP->nmodelpoints++; 4201f4a0ff5SJason Sarich blasnp = mfqP->nmodelpoints; 421a7e14dcfSSatish Balay 422a7e14dcfSSatish Balay /* Copy L_save to L */ 423ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = mfqP->L_save[i]; 424a7e14dcfSSatish Balay } 425a7e14dcfSSatish Balay point--; 426a7e14dcfSSatish Balay } 427a7e14dcfSSatish Balay 428a7e14dcfSSatish Balay blasnp = mfqP->nmodelpoints; 429a7e14dcfSSatish Balay /* Copy Q(:,n+2:np) to Z */ 430a7e14dcfSSatish Balay /* First set Q_tmp to I */ 431ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q_tmp[i] = 0.0; 432ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax; i++) mfqP->Q_tmp[i + mfqP->npmax * i] = 1.0; 433a7e14dcfSSatish Balay 434a7e14dcfSSatish Balay /* Q_tmp = I * Q */ 435792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasnp, &blasnp, &blasnplus1, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info)); 43663a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info); 437a7e14dcfSSatish Balay 438a7e14dcfSSatish Balay /* Copy Q_tmp(:,n+2:np) to Z) */ 439a7e14dcfSSatish Balay offset = mfqP->npmax * (mfqP->n + 1); 440ad540459SPierre Jolivet for (i = offset; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Z[i - offset] = mfqP->Q_tmp[i]; 441a7e14dcfSSatish Balay 442a7e14dcfSSatish Balay if (mfqP->nmodelpoints == mfqP->n + 1) { 443a7e14dcfSSatish Balay /* Set L to I_{n+1} */ 444ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = 0.0; 445ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->L[(mfqP->n * (mfqP->n + 1) / 2) * i + i] = 1.0; 446a7e14dcfSSatish Balay } 447a7e14dcfSSatish Balay PetscFunctionReturn(0); 448a7e14dcfSSatish Balay } 449a7e14dcfSSatish Balay 450a7e14dcfSSatish Balay /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */ 451*d71ae5a4SJacob Faibussowitsch static PetscErrorCode addpoint(Tao tao, TAO_POUNDERS *mfqP, PetscInt index) 452*d71ae5a4SJacob Faibussowitsch { 453a7e14dcfSSatish Balay PetscFunctionBegin; 454a7e14dcfSSatish Balay /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/ 4559566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->Xhist[0], &mfqP->Xhist[mfqP->nHist])); 4569566063dSJacob Faibussowitsch PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, &mfqP->Q_tmp[index * mfqP->npmax], INSERT_VALUES)); 4579566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist])); 4589566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist])); 4599566063dSJacob Faibussowitsch PetscCall(VecAYPX(mfqP->Xhist[mfqP->nHist], mfqP->delta, mfqP->Xhist[mfqP->minindex])); 460a7e14dcfSSatish Balay 461a7e14dcfSSatish Balay /* Project into feasible region */ 46248a46eb9SPierre Jolivet if (tao->XU && tao->XL) PetscCall(VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU, mfqP->Xhist[mfqP->nHist])); 463a7e14dcfSSatish Balay 464a7e14dcfSSatish Balay /* Compute value of new vector */ 4659566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->Fhist[0], &mfqP->Fhist[mfqP->nHist])); 466a7e14dcfSSatish Balay CHKMEMQ; 4679566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist])); 468a7e14dcfSSatish Balay 469a7e14dcfSSatish Balay /* Add new vector to model */ 470a7e14dcfSSatish Balay mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist; 471a7e14dcfSSatish Balay mfqP->nmodelpoints++; 472a7e14dcfSSatish Balay mfqP->nHist++; 473a7e14dcfSSatish Balay PetscFunctionReturn(0); 474a7e14dcfSSatish Balay } 475a7e14dcfSSatish Balay 476*d71ae5a4SJacob Faibussowitsch static PetscErrorCode modelimprove(Tao tao, TAO_POUNDERS *mfqP, PetscInt addallpoints) 477*d71ae5a4SJacob Faibussowitsch { 478a7e14dcfSSatish Balay /* modeld = Q(:,np+1:n)' */ 479a7e14dcfSSatish Balay PetscInt i, j, minindex = 0; 480e270355aSBarry Smith PetscReal dp, half = 0.5, one = 1.0, minvalue = PETSC_INFINITY; 481a7e14dcfSSatish Balay PetscBLASInt blasn = mfqP->n, blasnpmax = mfqP->npmax, blask, info; 482a7e14dcfSSatish Balay PetscBLASInt blas1 = 1, blasnmax = mfqP->nmax; 483f06e3bfaSBarry Smith 484362febeeSStefano Zampini PetscFunctionBegin; 485a7e14dcfSSatish Balay blask = mfqP->nmodelpoints; 486a7e14dcfSSatish Balay /* Qtmp = I(n x n) */ 487a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 488ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i + mfqP->npmax * j] = 0.0; 489a7e14dcfSSatish Balay } 490ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[j + mfqP->npmax * j] = 1.0; 491a7e14dcfSSatish Balay 492a7e14dcfSSatish Balay /* Qtmp = Q * I */ 493792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasn, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info)); 494a7e14dcfSSatish Balay 495a7e14dcfSSatish Balay for (i = mfqP->nmodelpoints; i < mfqP->n; i++) { 496792fecdfSBarry Smith PetscCallBLAS("BLASdot", dp = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->Gres, &blas1)); 497a7e14dcfSSatish Balay if (dp > 0.0) { /* Model says use the other direction! */ 498ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i * mfqP->npmax + j] *= -1; 499a7e14dcfSSatish Balay } 500a7e14dcfSSatish Balay /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */ 501ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->work2[j] = mfqP->Gres[j]; 502792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &half, mfqP->Hres, &blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, &one, mfqP->work2, &blas1)); 503792fecdfSBarry Smith PetscCallBLAS("BLASdot", mfqP->work[i] = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->work2, &blas1)); 504a7e14dcfSSatish Balay if (i == mfqP->nmodelpoints || mfqP->work[i] < minvalue) { 505a7e14dcfSSatish Balay minindex = i; 506a7e14dcfSSatish Balay minvalue = mfqP->work[i]; 507a7e14dcfSSatish Balay } 50848a46eb9SPierre Jolivet if (addallpoints != 0) PetscCall(addpoint(tao, mfqP, i)); 509a7e14dcfSSatish Balay } 51048a46eb9SPierre Jolivet if (!addallpoints) PetscCall(addpoint(tao, mfqP, minindex)); 511a7e14dcfSSatish Balay PetscFunctionReturn(0); 512a7e14dcfSSatish Balay } 513a7e14dcfSSatish Balay 514*d71ae5a4SJacob Faibussowitsch static PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, PetscReal c) 515*d71ae5a4SJacob Faibussowitsch { 516a7e14dcfSSatish Balay PetscInt i, j; 517a7e14dcfSSatish Balay PetscBLASInt blasm = mfqP->m, blasj, blask, blasn = mfqP->n, ione = 1, info; 518a7e14dcfSSatish Balay PetscBLASInt blasnpmax = mfqP->npmax, blasmaxmn; 519a7e14dcfSSatish Balay PetscReal proj, normd; 5205e081366SBarry Smith const PetscReal *x; 521a7e14dcfSSatish Balay 522f06e3bfaSBarry Smith PetscFunctionBegin; 523a7e14dcfSSatish Balay for (i = mfqP->nHist - 1; i >= 0; i--) { 5249566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[i], &x)); 525ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->work[j] = (x[j] - xmin[j]) / mfqP->delta; 5269566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[i], &x)); 527792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, mfqP->work2, &ione)); 528792fecdfSBarry Smith PetscCallBLAS("BLASnrm2", normd = BLASnrm2_(&blasn, mfqP->work, &ione)); 529a8bd9cb3STodd Munson if (normd <= c) { 53029c6e7daSBarry Smith blasj = PetscMax((mfqP->n - mfqP->nmodelpoints), 0); 531a7e14dcfSSatish Balay if (!mfqP->q_is_I) { 532a7e14dcfSSatish Balay /* project D onto null */ 533a7e14dcfSSatish Balay blask = (mfqP->nmodelpoints); 534792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &ione, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->work2, &ione, mfqP->mwork, &blasm, &info)); 53563a3b9bcSJacob Faibussowitsch PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "ormqr returned value %" PetscBLASInt_FMT, info); 536a7e14dcfSSatish Balay } 537792fecdfSBarry Smith PetscCallBLAS("BLASnrm2", proj = BLASnrm2_(&blasj, &mfqP->work2[mfqP->nmodelpoints], &ione)); 538a7e14dcfSSatish Balay 539a7e14dcfSSatish Balay if (proj >= mfqP->theta1) { /* add this index to model */ 540a7e14dcfSSatish Balay mfqP->model_indices[mfqP->nmodelpoints] = i; 541a7e14dcfSSatish Balay mfqP->nmodelpoints++; 542792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, &mfqP->Q_tmp[mfqP->npmax * (mfqP->nmodelpoints - 1)], &ione)); 543a7e14dcfSSatish Balay blask = mfqP->npmax * (mfqP->nmodelpoints); 544792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blask, mfqP->Q_tmp, &ione, mfqP->Q, &ione)); 545a7e14dcfSSatish Balay blask = mfqP->nmodelpoints; 546a7e14dcfSSatish Balay blasmaxmn = PetscMax(mfqP->m, mfqP->n); 547792fecdfSBarry Smith PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->mwork, &blasmaxmn, &info)); 54863a3b9bcSJacob Faibussowitsch PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "geqrf returned value %" PetscBLASInt_FMT, info); 549a7e14dcfSSatish Balay mfqP->q_is_I = 0; 550a7e14dcfSSatish Balay } 551ad540459SPierre Jolivet if (mfqP->nmodelpoints == mfqP->n) break; 552a7e14dcfSSatish Balay } 553a7e14dcfSSatish Balay } 554babe810cSJason Sarich 555a7e14dcfSSatish Balay PetscFunctionReturn(0); 556a7e14dcfSSatish Balay } 557f06e3bfaSBarry Smith 558*d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoSolve_POUNDERS(Tao tao) 559*d71ae5a4SJacob Faibussowitsch { 560a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 5618931d482SJason Sarich PetscInt i, ii, j, k, l; 562a7e14dcfSSatish Balay PetscReal step = 1.0; 563a7e14dcfSSatish Balay PetscInt low, high; 564a7e14dcfSSatish Balay PetscReal minnorm; 5655e081366SBarry Smith PetscReal *x, *f; 5665e081366SBarry Smith const PetscReal *xmint, *fmin; 567bd026e97SJed Brown PetscReal deltaold; 568a7e14dcfSSatish Balay PetscReal gnorm; 569a7e14dcfSSatish Balay PetscBLASInt info, ione = 1, iblas; 570ff2b74efSJason Sarich PetscBool valid, same; 571a7e14dcfSSatish Balay PetscReal mdec, rho, normxsp; 572a7e14dcfSSatish Balay PetscReal one = 1.0, zero = 0.0, ratio; 573125ddc8aSJason Sarich PetscBLASInt blasm, blasn, blasncopy, blasnpmax; 574a7bbbda8SBarry Smith static PetscBool set = PETSC_FALSE; 575a7e14dcfSSatish Balay 576a7e14dcfSSatish Balay /* n = # of parameters 577a7e14dcfSSatish Balay m = dimension (components) of function */ 578a7e14dcfSSatish Balay PetscFunctionBegin; 5799566063dSJacob Faibussowitsch PetscCall(PetscCitationsRegister("@article{UNEDF0,\n" 580a7bbbda8SBarry Smith "title = {Nuclear energy density optimization},\n" 581a7bbbda8SBarry Smith "author = {Kortelainen, M. and Lesinski, T. and Mor\'e, J. and Nazarewicz, W.\n" 582a7bbbda8SBarry Smith " and Sarich, J. and Schunck, N. and Stoitsov, M. V. and Wild, S. },\n" 583a7bbbda8SBarry Smith "journal = {Phys. Rev. C},\n" 584a7bbbda8SBarry Smith "volume = {82},\n" 585a7bbbda8SBarry Smith "number = {2},\n" 586a7bbbda8SBarry Smith "pages = {024313},\n" 587a7bbbda8SBarry Smith "numpages = {18},\n" 588a7bbbda8SBarry Smith "year = {2010},\n" 589a7bbbda8SBarry Smith "month = {Aug},\n" 5909371c9d4SSatish Balay "doi = {10.1103/PhysRevC.82.024313}\n}\n", 5919371c9d4SSatish Balay &set)); 592e6d4cb7fSJason Sarich tao->niter = 0; 593a7e14dcfSSatish Balay if (tao->XL && tao->XU) { 594a7e14dcfSSatish Balay /* Check x0 <= XU */ 595a7e14dcfSSatish Balay PetscReal val; 596ce902467SBarry Smith 5979566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->solution, mfqP->Xhist[0])); 5989566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU)); 5999566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 6003c859ba3SBarry Smith PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 > upper bound"); 601a7e14dcfSSatish Balay 602a7e14dcfSSatish Balay /* Check x0 >= xl */ 6039566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->XL, mfqP->Xhist[0])); 6049566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->solution)); 6059566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 6063c859ba3SBarry Smith PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 < lower bound"); 607f06e3bfaSBarry Smith 608a7e14dcfSSatish Balay /* Check x0 + delta < XU -- should be able to get around this eventually */ 609a7e14dcfSSatish Balay 6109566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->Xhist[0], mfqP->delta)); 6119566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], 1.0, tao->solution)); 6129566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU)); 6139566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 6143c859ba3SBarry Smith PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 + delta > upper bound"); 615a7e14dcfSSatish Balay } 616a7e14dcfSSatish Balay 6179371c9d4SSatish Balay blasm = mfqP->m; 6189371c9d4SSatish Balay blasn = mfqP->n; 6199371c9d4SSatish Balay blasnpmax = mfqP->npmax; 620ce902467SBarry Smith for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; ++i) mfqP->H[i] = 0; 621a7e14dcfSSatish Balay 6229566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->solution, mfqP->Xhist[0])); 623ce902467SBarry Smith 624ce902467SBarry Smith /* This provides enough information to approximate the gradient of the objective */ 625ce902467SBarry Smith /* using a forward difference scheme. */ 626ce902467SBarry Smith 6279566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Initialize simplex; delta = %10.9e\n", (double)mfqP->delta)); 6289566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[0], mfqP->Fhist[0], &mfqP->Fres[0])); 629a7e14dcfSSatish Balay mfqP->minindex = 0; 630a7e14dcfSSatish Balay minnorm = mfqP->Fres[0]; 631ff2b74efSJason Sarich 6329566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(mfqP->Xhist[0], &low, &high)); 633805332fbSTodd Munson for (i = 1; i < mfqP->n + 1; ++i) { 6349566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[0], mfqP->Xhist[i])); 635ce902467SBarry Smith 636a7e14dcfSSatish Balay if (i - 1 >= low && i - 1 < high) { 6379566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Xhist[i], &x)); 638a7e14dcfSSatish Balay x[i - 1 - low] += mfqP->delta; 6399566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Xhist[i], &x)); 640a7e14dcfSSatish Balay } 641a7e14dcfSSatish Balay CHKMEMQ; 6429566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[i], mfqP->Fhist[i], &mfqP->Fres[i])); 643a7e14dcfSSatish Balay if (mfqP->Fres[i] < minnorm) { 644a7e14dcfSSatish Balay mfqP->minindex = i; 645a7e14dcfSSatish Balay minnorm = mfqP->Fres[i]; 646a7e14dcfSSatish Balay } 647a7e14dcfSSatish Balay } 6489566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 6499566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res)); 6509566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Finalize simplex; minnorm = %10.9e\n", (double)minnorm)); 651ce902467SBarry Smith 652a7e14dcfSSatish Balay /* Gather mpi vecs to one big local vec */ 653a7e14dcfSSatish Balay 654a7e14dcfSSatish Balay /* Begin serial code */ 655a7e14dcfSSatish Balay 656a7e14dcfSSatish Balay /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */ 657a7e14dcfSSatish Balay /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */ 658a7e14dcfSSatish Balay /* (Column oriented for blas calls) */ 659a7e14dcfSSatish Balay ii = 0; 660a7e14dcfSSatish Balay 66163a3b9bcSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Build matrix: %" PetscInt_FMT "\n", (PetscInt)mfqP->size)); 662805332fbSTodd Munson if (1 == mfqP->size) { 6639566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 664a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 6659566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 6669566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 667a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 668a7e14dcfSSatish Balay if (i == mfqP->minindex) continue; 669a7e14dcfSSatish Balay 6709566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Xhist[i], &x)); 671ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 6729566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Xhist[i], &x)); 673a7e14dcfSSatish Balay 6749566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Fhist[i], &f)); 675ad540459SPierre Jolivet for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; 6769566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Fhist[i], &f)); 67794e8dccaSTodd Munson 678a7e14dcfSSatish Balay mfqP->model_indices[ii++] = i; 679a7e14dcfSSatish Balay } 680ad540459SPierre Jolivet for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j]; 6819566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 682a7e14dcfSSatish Balay } else { 6839566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->localxmin, 0)); 6849566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD)); 6859566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD)); 686ce902467SBarry Smith 6879566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->localxmin, &xmint)); 688a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 6899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->localxmin, &xmint)); 690a7e14dcfSSatish Balay 6919566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD)); 6929566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD)); 6939566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->localfmin, &fmin)); 694a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 695a7e14dcfSSatish Balay if (i == mfqP->minindex) continue; 696a7e14dcfSSatish Balay 6979566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD)); 6989566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD)); 6999566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->localx, &x)); 700ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 7019566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->localx, &x)); 702a7e14dcfSSatish Balay 7039566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD)); 7049566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD)); 7059566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->localf, &f)); 706ad540459SPierre Jolivet for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; 7079566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->localf, &f)); 70894e8dccaSTodd Munson 70994e8dccaSTodd Munson mfqP->model_indices[ii++] = i; 710a7e14dcfSSatish Balay } 711ad540459SPierre Jolivet for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j]; 7129566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->localfmin, &fmin)); 713a7e14dcfSSatish Balay } 714a7e14dcfSSatish Balay 715a7e14dcfSSatish Balay /* Determine the initial quadratic models */ 716a7e14dcfSSatish Balay /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */ 717a7e14dcfSSatish Balay /* D (nxn) Fdiff (nxm) => G (nxm) */ 718125ddc8aSJason Sarich blasncopy = blasn; 719792fecdfSBarry Smith PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&blasn, &blasm, mfqP->Disp, &blasnpmax, mfqP->iwork, mfqP->Fdiff, &blasncopy, &info)); 72063a3b9bcSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Linear solve return: %" PetscInt_FMT "\n", (PetscInt)info)); 721a7e14dcfSSatish Balay 7229566063dSJacob Faibussowitsch PetscCall(pounders_update_res(tao)); 723a7e14dcfSSatish Balay 724a7e14dcfSSatish Balay valid = PETSC_TRUE; 725a7e14dcfSSatish Balay 7269566063dSJacob Faibussowitsch PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 7279566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(tao->gradient)); 7289566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(tao->gradient)); 7299566063dSJacob Faibussowitsch PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 730a7e14dcfSSatish Balay gnorm *= mfqP->delta; 7319566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 732763847b4SAlp Dener 733763847b4SAlp Dener tao->reason = TAO_CONTINUE_ITERATING; 7349566063dSJacob Faibussowitsch PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its)); 7359566063dSJacob Faibussowitsch PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step)); 736dbbe0bcdSBarry Smith PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 737763847b4SAlp Dener 738a7e14dcfSSatish Balay mfqP->nHist = mfqP->n + 1; 739a7e14dcfSSatish Balay mfqP->nmodelpoints = mfqP->n + 1; 7409566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Initial gradient: %20.19e\n", (double)gnorm)); 741a7e14dcfSSatish Balay 742763847b4SAlp Dener while (tao->reason == TAO_CONTINUE_ITERATING) { 743a7e14dcfSSatish Balay PetscReal gnm = 1e-4; 744e1e80dc8SAlp Dener /* Call general purpose update function */ 745dbbe0bcdSBarry Smith PetscTryTypeMethod(tao, update, tao->niter, tao->user_update); 7468931d482SJason Sarich tao->niter++; 747ce902467SBarry Smith /* Solve the subproblem min{Q(s): ||s|| <= 1.0} */ 7489566063dSJacob Faibussowitsch PetscCall(gqtwrap(tao, &gnm, &mdec)); 749a7e14dcfSSatish Balay /* Evaluate the function at the new point */ 750a7e14dcfSSatish Balay 751ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->work[i] = mfqP->Xsubproblem[i] * mfqP->delta + mfqP->xmin[i]; 7529566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[mfqP->nHist])); 7539566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[mfqP->nHist])); 7549566063dSJacob Faibussowitsch PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, mfqP->work, INSERT_VALUES)); 7559566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist])); 7569566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist])); 757a7e14dcfSSatish Balay 7589566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist])); 759125ddc8aSJason Sarich 760a7e14dcfSSatish Balay rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec; 761a7e14dcfSSatish Balay mfqP->nHist++; 762a7e14dcfSSatish Balay 763a7e14dcfSSatish Balay /* Update the center */ 764a7e14dcfSSatish Balay if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid == PETSC_TRUE)) { 765a7e14dcfSSatish Balay /* Update model to reflect new base point */ 766ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i]) / mfqP->delta; 767a7e14dcfSSatish Balay for (j = 0; j < mfqP->m; j++) { 768a7e14dcfSSatish Balay /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work'; 769a7e14dcfSSatish Balay G(:,j) = G(:,j) + H(:,:,j)*work' */ 770a7e14dcfSSatish Balay for (k = 0; k < mfqP->n; k++) { 771a7e14dcfSSatish Balay mfqP->work2[k] = 0.0; 772ad540459SPierre Jolivet for (l = 0; l < mfqP->n; l++) mfqP->work2[k] += mfqP->H[j + mfqP->m * (k + l * mfqP->n)] * mfqP->work[l]; 773a7e14dcfSSatish Balay } 774a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 775a7e14dcfSSatish Balay mfqP->C[j] += mfqP->work[i] * (mfqP->Fdiff[i + mfqP->n * j] + 0.5 * mfqP->work2[i]); 776a7e14dcfSSatish Balay mfqP->Fdiff[i + mfqP->n * j] += mfqP->work2[i]; 777a7e14dcfSSatish Balay } 778a7e14dcfSSatish Balay } 779a7e14dcfSSatish Balay /* Cres += work*Gres + .5*work*Hres*work'; 780a7e14dcfSSatish Balay Gres += Hres*work'; */ 781a7e14dcfSSatish Balay 782792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &one, mfqP->Hres, &blasn, mfqP->work, &ione, &zero, mfqP->work2, &ione)); 783ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] += mfqP->work2[i]; 784a7e14dcfSSatish Balay mfqP->minindex = mfqP->nHist - 1; 785a7e14dcfSSatish Balay minnorm = mfqP->Fres[mfqP->minindex]; 7869566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res)); 787a7e14dcfSSatish Balay /* Change current center */ 7889566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 789ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 7909566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 791a7e14dcfSSatish Balay } 792a7e14dcfSSatish Balay 793a7e14dcfSSatish Balay /* Evaluate at a model-improving point if necessary */ 794a7e14dcfSSatish Balay if (valid == PETSC_FALSE) { 795a7e14dcfSSatish Balay mfqP->q_is_I = 1; 7968a35fac9SJason Sarich mfqP->nmodelpoints = 0; 7979566063dSJacob Faibussowitsch PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1)); 798a7e14dcfSSatish Balay if (mfqP->nmodelpoints < mfqP->n) { 7999566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Model not valid -- model-improving\n")); 8009566063dSJacob Faibussowitsch PetscCall(modelimprove(tao, mfqP, 1)); 801a7e14dcfSSatish Balay } 802a7e14dcfSSatish Balay } 803a7e14dcfSSatish Balay 804a7e14dcfSSatish Balay /* Update the trust region radius */ 805a7e14dcfSSatish Balay deltaold = mfqP->delta; 806a7e14dcfSSatish Balay normxsp = 0; 807ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) normxsp += mfqP->Xsubproblem[i] * mfqP->Xsubproblem[i]; 808a7e14dcfSSatish Balay normxsp = PetscSqrtReal(normxsp); 809a7e14dcfSSatish Balay if (rho >= mfqP->eta1 && normxsp > 0.5 * mfqP->delta) { 810a7e14dcfSSatish Balay mfqP->delta = PetscMin(mfqP->delta * mfqP->gamma1, mfqP->deltamax); 811a7e14dcfSSatish Balay } else if (valid == PETSC_TRUE) { 812a7e14dcfSSatish Balay mfqP->delta = PetscMax(mfqP->delta * mfqP->gamma0, mfqP->deltamin); 813a7e14dcfSSatish Balay } 814a7e14dcfSSatish Balay 815a7e14dcfSSatish Balay /* Compute the next interpolation set */ 816a7e14dcfSSatish Balay mfqP->q_is_I = 1; 817a7e14dcfSSatish Balay mfqP->nmodelpoints = 0; 8189566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c1 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c1)); 8199566063dSJacob Faibussowitsch PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1)); 820a7e14dcfSSatish Balay if (mfqP->nmodelpoints == mfqP->n) { 821a7e14dcfSSatish Balay valid = PETSC_TRUE; 822a7e14dcfSSatish Balay } else { 823a7e14dcfSSatish Balay valid = PETSC_FALSE; 8249566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c2 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c2)); 8259566063dSJacob Faibussowitsch PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c2)); 826a7e14dcfSSatish Balay if (mfqP->n > mfqP->nmodelpoints) { 8279566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Model not valid -- adding geometry points\n")); 8289566063dSJacob Faibussowitsch PetscCall(modelimprove(tao, mfqP, mfqP->n - mfqP->nmodelpoints)); 829a7e14dcfSSatish Balay } 830a7e14dcfSSatish Balay } 831ad540459SPierre Jolivet for (i = mfqP->nmodelpoints; i > 0; i--) mfqP->model_indices[i] = mfqP->model_indices[i - 1]; 832a7e14dcfSSatish Balay mfqP->nmodelpoints++; 833a7e14dcfSSatish Balay mfqP->model_indices[0] = mfqP->minindex; 8349566063dSJacob Faibussowitsch PetscCall(morepoints(mfqP)); 835a7e14dcfSSatish Balay for (i = 0; i < mfqP->nmodelpoints; i++) { 8369566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Xhist[mfqP->model_indices[i]], &x)); 837ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Disp[i + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / deltaold; 8389566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]], &x)); 8399566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Fhist[mfqP->model_indices[i]], &f)); 840a7e14dcfSSatish Balay for (j = 0; j < mfqP->m; j++) { 841a7e14dcfSSatish Balay for (k = 0; k < mfqP->n; k++) { 842a7e14dcfSSatish Balay mfqP->work[k] = 0.0; 843ad540459SPierre Jolivet for (l = 0; l < mfqP->n; l++) mfqP->work[k] += mfqP->H[j + mfqP->m * (k + mfqP->n * l)] * mfqP->Disp[i + mfqP->npmax * l]; 844a7e14dcfSSatish Balay } 845792fecdfSBarry Smith PetscCallBLAS("BLASdot", mfqP->RES[j * mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn, &mfqP->Fdiff[j * mfqP->n], &ione, &mfqP->Disp[i], &blasnpmax) - 0.5 * BLASdot_(&blasn, mfqP->work, &ione, &mfqP->Disp[i], &blasnpmax) + f[j]); 846a7e14dcfSSatish Balay } 8479566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]], &f)); 848a7e14dcfSSatish Balay } 849a7e14dcfSSatish Balay 850a7e14dcfSSatish Balay /* Update the quadratic model */ 85163a3b9bcSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Get Quad, size: %" PetscInt_FMT ", points: %" PetscInt_FMT "\n", mfqP->n, mfqP->nmodelpoints)); 8529566063dSJacob Faibussowitsch PetscCall(getquadpounders(mfqP)); 8539566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 854792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasm, fmin, &ione, mfqP->C, &ione)); 855a7e14dcfSSatish Balay /* G = G*(delta/deltaold) + Gdel */ 856a7e14dcfSSatish Balay ratio = mfqP->delta / deltaold; 857a7e14dcfSSatish Balay iblas = blasm * blasn; 858792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->Fdiff, &ione)); 859792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Gdel, &ione, mfqP->Fdiff, &ione)); 860125ddc8aSJason Sarich /* H = H*(delta/deltaold)^2 + Hdel */ 861a7e14dcfSSatish Balay iblas = blasm * blasn * blasn; 862a7e14dcfSSatish Balay ratio *= ratio; 863792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->H, &ione)); 864792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Hdel, &ione, mfqP->H, &ione)); 865a7e14dcfSSatish Balay 866a7e14dcfSSatish Balay /* Get residuals */ 8679566063dSJacob Faibussowitsch PetscCall(pounders_update_res(tao)); 868a7e14dcfSSatish Balay 869a7e14dcfSSatish Balay /* Export solution and gradient residual to TAO */ 8709566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 8719566063dSJacob Faibussowitsch PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 8729566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(tao->gradient)); 8739566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(tao->gradient)); 8749566063dSJacob Faibussowitsch PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 875a7e14dcfSSatish Balay gnorm *= mfqP->delta; 876a7e14dcfSSatish Balay /* final criticality test */ 8779566063dSJacob Faibussowitsch PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its)); 8789566063dSJacob Faibussowitsch PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step)); 879dbbe0bcdSBarry Smith PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 880ff2b74efSJason Sarich /* test for repeated model */ 881c9376581SJason Sarich if (mfqP->nmodelpoints == mfqP->last_nmodelpoints) { 882c9376581SJason Sarich same = PETSC_TRUE; 883c9376581SJason Sarich } else { 884c9376581SJason Sarich same = PETSC_FALSE; 885c9376581SJason Sarich } 886ff2b74efSJason Sarich for (i = 0; i < mfqP->nmodelpoints; i++) { 887c9376581SJason Sarich if (same) { 888c9376581SJason Sarich if (mfqP->model_indices[i] == mfqP->last_model_indices[i]) { 889c9376581SJason Sarich same = PETSC_TRUE; 890c9376581SJason Sarich } else { 891c9376581SJason Sarich same = PETSC_FALSE; 892c9376581SJason Sarich } 893c9376581SJason Sarich } 894ff2b74efSJason Sarich mfqP->last_model_indices[i] = mfqP->model_indices[i]; 895ff2b74efSJason Sarich } 896ff2b74efSJason Sarich mfqP->last_nmodelpoints = mfqP->nmodelpoints; 897ff2b74efSJason Sarich if (same && mfqP->delta == deltaold) { 8989566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Identical model used in successive iterations\n")); 8995d72676cSJason Sarich tao->reason = TAO_CONVERGED_STEPTOL; 900ff2b74efSJason Sarich } 901a7e14dcfSSatish Balay } 902a7e14dcfSSatish Balay PetscFunctionReturn(0); 903a7e14dcfSSatish Balay } 904a7e14dcfSSatish Balay 905*d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoSetUp_POUNDERS(Tao tao) 906*d71ae5a4SJacob Faibussowitsch { 907a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 908125ddc8aSJason Sarich PetscInt i, j; 909a7e14dcfSSatish Balay IS isfloc, isfglob, isxloc, isxglob; 910a7e14dcfSSatish Balay 911a7e14dcfSSatish Balay PetscFunctionBegin; 9129566063dSJacob Faibussowitsch if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient)); 9139566063dSJacob Faibussowitsch if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection)); 9149566063dSJacob Faibussowitsch PetscCall(VecGetSize(tao->solution, &mfqP->n)); 9159566063dSJacob Faibussowitsch PetscCall(VecGetSize(tao->ls_res, &mfqP->m)); 916a7e14dcfSSatish Balay mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n); 917ad540459SPierre Jolivet if (mfqP->npmax == PETSC_DEFAULT) mfqP->npmax = 2 * mfqP->n + 1; 918a7e14dcfSSatish Balay mfqP->npmax = PetscMin((mfqP->n + 1) * (mfqP->n + 2) / 2, mfqP->npmax); 919a7e14dcfSSatish Balay mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n + 2); 920a7e14dcfSSatish Balay 9219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Xhist)); 9229566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fhist)); 923a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 9249566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[i])); 9259566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[i])); 926a7e14dcfSSatish Balay } 9279566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->solution, &mfqP->workxvec)); 9289566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->ls_res, &mfqP->workfvec)); 929a7e14dcfSSatish Balay mfqP->nHist = 0; 930a7e14dcfSSatish Balay 9319566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fres)); 9329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->m, &mfqP->RES)); 9339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->work)); 9349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->work2)); 9359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->work3)); 9369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n + 1), &mfqP->mwork)); 9379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax - mfqP->n - 1, &mfqP->omega)); 9389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2, &mfqP->beta)); 9399566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n + 1, &mfqP->alpha)); 940a7e14dcfSSatish Balay 9419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->H)); 9429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q)); 9439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q_tmp)); 9449566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L)); 9459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_tmp)); 9469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_save)); 9479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->N)); 9489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->n + 1), &mfqP->M)); 9499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->npmax - mfqP->n - 1), &mfqP->Z)); 9509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau)); 9519566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau_tmp)); 952a7e14dcfSSatish Balay mfqP->nmax = PetscMax(5 * mfqP->npmax, mfqP->n * (mfqP->n + 1) / 2); 9539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxwork)); 9549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxiwork)); 9559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->xmin)); 9569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m, &mfqP->C)); 9579566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Fdiff)); 9589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->n, &mfqP->Disp)); 9599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->Gres)); 9609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Hres)); 9619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Gpoints)); 9629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->model_indices)); 9639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->last_model_indices)); 9649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->Xsubproblem)); 9659566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Gdel)); 9669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->Hdel)); 9679566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n), &mfqP->indices)); 9689566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->iwork)); 9699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m * mfqP->m, &mfqP->w)); 970125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 971125ddc8aSJason Sarich for (j = 0; j < mfqP->m; j++) { 972125ddc8aSJason Sarich if (i == j) { 973125ddc8aSJason Sarich mfqP->w[i + mfqP->m * j] = 1.0; 974125ddc8aSJason Sarich } else { 975125ddc8aSJason Sarich mfqP->w[i + mfqP->m * j] = 0.0; 976125ddc8aSJason Sarich } 977125ddc8aSJason Sarich } 978125ddc8aSJason Sarich } 979ad540459SPierre Jolivet for (i = 0; i < PetscMax(mfqP->m, mfqP->n); i++) mfqP->indices[i] = i; 9809566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(((PetscObject)tao)->comm, &mfqP->size)); 981f40bd260SBarry Smith if (mfqP->size > 1) { 9829566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localx)); 9839566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localxmin)); 9849566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localf)); 9859566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localfmin)); 9869566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxloc)); 9879566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxglob)); 9889566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfloc)); 9899566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfglob)); 990a7e14dcfSSatish Balay 9919566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(tao->solution, isxglob, mfqP->localx, isxloc, &mfqP->scatterx)); 9929566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(tao->ls_res, isfglob, mfqP->localf, isfloc, &mfqP->scatterf)); 993a7e14dcfSSatish Balay 9949566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isxloc)); 9959566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isxglob)); 9969566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isfloc)); 9979566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isfglob)); 998a7e14dcfSSatish Balay } 999a7e14dcfSSatish Balay 1000a7e14dcfSSatish Balay if (!mfqP->usegqt) { 1001a7e14dcfSSatish Balay KSP ksp; 1002a7e14dcfSSatish Balay PC pc; 10039566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Xsubproblem, &mfqP->subx)); 10049566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->subxl)); 10059566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subb)); 10069566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subxu)); 10079566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subpdel)); 10089566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subndel)); 10099566063dSJacob Faibussowitsch PetscCall(TaoCreate(PETSC_COMM_SELF, &mfqP->subtao)); 10109566063dSJacob Faibussowitsch PetscCall(PetscObjectIncrementTabLevel((PetscObject)mfqP->subtao, (PetscObject)tao, 1)); 10119566063dSJacob Faibussowitsch PetscCall(TaoSetType(mfqP->subtao, TAOBNTR)); 10129566063dSJacob Faibussowitsch PetscCall(TaoSetOptionsPrefix(mfqP->subtao, "pounders_subsolver_")); 10139566063dSJacob Faibussowitsch PetscCall(TaoSetSolution(mfqP->subtao, mfqP->subx)); 10149566063dSJacob Faibussowitsch PetscCall(TaoSetObjectiveAndGradient(mfqP->subtao, NULL, pounders_fg, (void *)mfqP)); 10159566063dSJacob Faibussowitsch PetscCall(TaoSetMaximumIterations(mfqP->subtao, mfqP->gqt_maxits)); 10169566063dSJacob Faibussowitsch PetscCall(TaoSetFromOptions(mfqP->subtao)); 10179566063dSJacob Faibussowitsch PetscCall(TaoGetKSP(mfqP->subtao, &ksp)); 1018a7e14dcfSSatish Balay if (ksp) { 10199566063dSJacob Faibussowitsch PetscCall(KSPGetPC(ksp, &pc)); 10209566063dSJacob Faibussowitsch PetscCall(PCSetType(pc, PCNONE)); 1021a7e14dcfSSatish Balay } 10229566063dSJacob Faibussowitsch PetscCall(TaoSetVariableBounds(mfqP->subtao, mfqP->subxl, mfqP->subxu)); 10239566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Hres, &mfqP->subH)); 10249566063dSJacob Faibussowitsch PetscCall(TaoSetHessian(mfqP->subtao, mfqP->subH, mfqP->subH, pounders_h, (void *)mfqP)); 1025a7e14dcfSSatish Balay } 1026a7e14dcfSSatish Balay PetscFunctionReturn(0); 1027a7e14dcfSSatish Balay } 1028a7e14dcfSSatish Balay 1029*d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoDestroy_POUNDERS(Tao tao) 1030*d71ae5a4SJacob Faibussowitsch { 1031a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1032a7e14dcfSSatish Balay PetscInt i; 1033a7e14dcfSSatish Balay 1034a7e14dcfSSatish Balay PetscFunctionBegin; 1035a7e14dcfSSatish Balay if (!mfqP->usegqt) { 10369566063dSJacob Faibussowitsch PetscCall(TaoDestroy(&mfqP->subtao)); 10379566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subx)); 10389566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subxl)); 10399566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subxu)); 10409566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subb)); 10419566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subpdel)); 10429566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subndel)); 10439566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mfqP->subH)); 1044a7e14dcfSSatish Balay } 10459566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Fres)); 10469566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->RES)); 10479566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->work)); 10489566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->work2)); 10499566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->work3)); 10509566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->mwork)); 10519566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->omega)); 10529566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->beta)); 10539566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->alpha)); 10549566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->H)); 10559566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Q)); 10569566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Q_tmp)); 10579566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->L)); 10589566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->L_tmp)); 10599566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->L_save)); 10609566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->N)); 10619566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->M)); 10629566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Z)); 10639566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->tau)); 10649566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->tau_tmp)); 10659566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->npmaxwork)); 10669566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->npmaxiwork)); 10679566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->xmin)); 10689566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->C)); 10699566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Fdiff)); 10709566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Disp)); 10719566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Gres)); 10729566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Hres)); 10739566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Gpoints)); 10749566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->model_indices)); 10759566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->last_model_indices)); 10769566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Xsubproblem)); 10779566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Gdel)); 10789566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Hdel)); 10799566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->indices)); 10809566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->iwork)); 10819566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->w)); 1082a7e14dcfSSatish Balay for (i = 0; i < mfqP->nHist; i++) { 10839566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->Xhist[i])); 10849566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->Fhist[i])); 1085a7e14dcfSSatish Balay } 10869566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->workxvec)); 10879566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->workfvec)); 10889566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Xhist)); 10899566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Fhist)); 1090a7e14dcfSSatish Balay 1091f40bd260SBarry Smith if (mfqP->size > 1) { 10929566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localx)); 10939566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localxmin)); 10949566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localf)); 10959566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localfmin)); 1096a7e14dcfSSatish Balay } 10979566063dSJacob Faibussowitsch PetscCall(PetscFree(tao->data)); 1098a7e14dcfSSatish Balay PetscFunctionReturn(0); 1099a7e14dcfSSatish Balay } 1100a7e14dcfSSatish Balay 1101*d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoSetFromOptions_POUNDERS(Tao tao, PetscOptionItems *PetscOptionsObject) 1102*d71ae5a4SJacob Faibussowitsch { 1103a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1104a7e14dcfSSatish Balay 1105a7e14dcfSSatish Balay PetscFunctionBegin; 1106d0609cedSBarry Smith PetscOptionsHeadBegin(PetscOptionsObject, "POUNDERS method for least-squares optimization"); 11079566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-tao_pounders_delta", "initial delta", "", mfqP->delta, &mfqP->delta0, NULL)); 1108ff2b74efSJason Sarich mfqP->delta = mfqP->delta0; 11099566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-tao_pounders_npmax", "max number of points in model", "", mfqP->npmax, &mfqP->npmax, NULL)); 11109566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-tao_pounders_gqt", "use gqt algorithm for subproblem", "", mfqP->usegqt, &mfqP->usegqt, NULL)); 1111d0609cedSBarry Smith PetscOptionsHeadEnd(); 1112a7e14dcfSSatish Balay PetscFunctionReturn(0); 1113a7e14dcfSSatish Balay } 1114a7e14dcfSSatish Balay 1115*d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoView_POUNDERS(Tao tao, PetscViewer viewer) 1116*d71ae5a4SJacob Faibussowitsch { 1117a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1118a7e14dcfSSatish Balay PetscBool isascii; 1119a7e14dcfSSatish Balay 1120a7e14dcfSSatish Balay PetscFunctionBegin; 11219566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1122a7e14dcfSSatish Balay if (isascii) { 11239566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "initial delta: %g\n", (double)mfqP->delta0)); 11249566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "final delta: %g\n", (double)mfqP->delta)); 112563a3b9bcSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "model points: %" PetscInt_FMT "\n", mfqP->nmodelpoints)); 1126a7e14dcfSSatish Balay if (mfqP->usegqt) { 11279566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "subproblem solver: gqt\n")); 1128a7e14dcfSSatish Balay } else { 11299566063dSJacob Faibussowitsch PetscCall(TaoView(mfqP->subtao, viewer)); 1130a7e14dcfSSatish Balay } 1131a7e14dcfSSatish Balay } 1132a7e14dcfSSatish Balay PetscFunctionReturn(0); 1133a7e14dcfSSatish Balay } 11341eb8069cSJason Sarich /*MC 11351eb8069cSJason Sarich TAOPOUNDERS - POUNDERS derivate-free model-based algorithm for nonlinear least squares 11361eb8069cSJason Sarich 11371eb8069cSJason Sarich Options Database Keys: 11381eb8069cSJason Sarich + -tao_pounders_delta - initial step length 11391eb8069cSJason Sarich . -tao_pounders_npmax - maximum number of points in model 11401eb8069cSJason Sarich - -tao_pounders_gqt - use gqt algorithm for subproblem instead of TRON 11411eb8069cSJason Sarich 11421eb8069cSJason Sarich Level: beginner 11431eb8069cSJason Sarich 11441eb8069cSJason Sarich M*/ 1145a7e14dcfSSatish Balay 1146*d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode TaoCreate_POUNDERS(Tao tao) 1147*d71ae5a4SJacob Faibussowitsch { 1148a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1149a7e14dcfSSatish Balay 1150a7e14dcfSSatish Balay PetscFunctionBegin; 1151a7e14dcfSSatish Balay tao->ops->setup = TaoSetUp_POUNDERS; 1152a7e14dcfSSatish Balay tao->ops->solve = TaoSolve_POUNDERS; 1153a7e14dcfSSatish Balay tao->ops->view = TaoView_POUNDERS; 1154a7e14dcfSSatish Balay tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS; 1155a7e14dcfSSatish Balay tao->ops->destroy = TaoDestroy_POUNDERS; 1156a7e14dcfSSatish Balay 11574dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mfqP)); 1158a7e14dcfSSatish Balay tao->data = (void *)mfqP; 11596552cf8aSJason Sarich /* Override default settings (unless already changed) */ 11606552cf8aSJason Sarich if (!tao->max_it_changed) tao->max_it = 2000; 11616552cf8aSJason Sarich if (!tao->max_funcs_changed) tao->max_funcs = 4000; 1162f553c655STodd Munson mfqP->npmax = PETSC_DEFAULT; 1163ff2b74efSJason Sarich mfqP->delta0 = 0.1; 1164a7e14dcfSSatish Balay mfqP->delta = 0.1; 1165a7e14dcfSSatish Balay mfqP->deltamax = 1e3; 1166125ddc8aSJason Sarich mfqP->deltamin = 1e-6; 1167a8bd9cb3STodd Munson mfqP->c2 = 10.0; 1168a7e14dcfSSatish Balay mfqP->theta1 = 1e-5; 1169a7e14dcfSSatish Balay mfqP->theta2 = 1e-4; 1170a7e14dcfSSatish Balay mfqP->gamma0 = 0.5; 1171a7e14dcfSSatish Balay mfqP->gamma1 = 2.0; 1172a7e14dcfSSatish Balay mfqP->eta0 = 0.0; 1173a7e14dcfSSatish Balay mfqP->eta1 = 0.1; 1174f553c655STodd Munson mfqP->usegqt = PETSC_FALSE; 1175a7e14dcfSSatish Balay mfqP->gqt_rtol = 0.001; 1176a7e14dcfSSatish Balay mfqP->gqt_maxits = 50; 117783c8fe1dSLisandro Dalcin mfqP->workxvec = NULL; 1178a7e14dcfSSatish Balay PetscFunctionReturn(0); 1179a7e14dcfSSatish Balay } 1180