1aaa7dc30SBarry Smith #include <../src/tao/leastsquares/impls/pounders/pounders.h> 2a7e14dcfSSatish Balay 3d71ae5a4SJacob Faibussowitsch static PetscErrorCode pounders_h(Tao subtao, Vec v, Mat H, Mat Hpre, void *ctx) 4d71ae5a4SJacob Faibussowitsch { 5a7e14dcfSSatish Balay PetscFunctionBegin; 63ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7a7e14dcfSSatish Balay } 8ca88a2e0STodd Munson 9d71ae5a4SJacob Faibussowitsch static PetscErrorCode pounders_fg(Tao subtao, Vec x, PetscReal *f, Vec g, void *ctx) 10d71ae5a4SJacob Faibussowitsch { 11a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)ctx; 12a7e14dcfSSatish Balay PetscReal d1, d2; 13f06e3bfaSBarry Smith 14a7e14dcfSSatish Balay PetscFunctionBegin; 15a7e14dcfSSatish Balay /* g = A*x (add b later)*/ 169566063dSJacob Faibussowitsch PetscCall(MatMult(mfqP->subH, x, g)); 17a7e14dcfSSatish Balay 18a7e14dcfSSatish Balay /* f = 1/2 * x'*(Ax) + b'*x */ 199566063dSJacob Faibussowitsch PetscCall(VecDot(x, g, &d1)); 209566063dSJacob Faibussowitsch PetscCall(VecDot(mfqP->subb, x, &d2)); 21a7e14dcfSSatish Balay *f = 0.5 * d1 + d2; 22a7e14dcfSSatish Balay 23a7e14dcfSSatish Balay /* now g = g + b */ 249566063dSJacob Faibussowitsch PetscCall(VecAXPY(g, 1.0, mfqP->subb)); 253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26a7e14dcfSSatish Balay } 27ca88a2e0STodd Munson 28d71ae5a4SJacob Faibussowitsch static PetscErrorCode pounders_feval(Tao tao, Vec x, Vec F, PetscReal *fsum) 29d71ae5a4SJacob Faibussowitsch { 30125ddc8aSJason Sarich TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 31125ddc8aSJason Sarich PetscInt i, row, col; 32125ddc8aSJason Sarich PetscReal fr, fc; 33691b26d3SBarry Smith 34125ddc8aSJason Sarich PetscFunctionBegin; 359566063dSJacob Faibussowitsch PetscCall(TaoComputeResidual(tao, x, F)); 36737f463aSAlp Dener if (tao->res_weights_v) { 379566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(mfqP->workfvec, tao->res_weights_v, F)); 389566063dSJacob Faibussowitsch PetscCall(VecDot(mfqP->workfvec, mfqP->workfvec, fsum)); 39737f463aSAlp Dener } else if (tao->res_weights_w) { 40125ddc8aSJason Sarich *fsum = 0; 41737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 42737f463aSAlp Dener row = tao->res_weights_rows[i]; 43737f463aSAlp Dener col = tao->res_weights_cols[i]; 449566063dSJacob Faibussowitsch PetscCall(VecGetValues(F, 1, &row, &fr)); 459566063dSJacob Faibussowitsch PetscCall(VecGetValues(F, 1, &col, &fc)); 46737f463aSAlp Dener *fsum += tao->res_weights_w[i] * fc * fr; 47125ddc8aSJason Sarich } 48125ddc8aSJason Sarich } else { 499566063dSJacob Faibussowitsch PetscCall(VecDot(F, F, fsum)); 50125ddc8aSJason Sarich } 519566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Least-squares residual norm: %20.19e\n", (double)*fsum)); 523c859ba3SBarry Smith PetscCheck(!PetscIsInfOrNanReal(*fsum), PETSC_COMM_SELF, PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 54125ddc8aSJason Sarich } 55f06e3bfaSBarry Smith 56d71ae5a4SJacob Faibussowitsch static PetscErrorCode gqtwrap(Tao tao, PetscReal *gnorm, PetscReal *qmin) 57d71ae5a4SJacob Faibussowitsch { 586f4723b1SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 596f4723b1SBarry Smith PetscReal atol = 1.0e-5; 606f4723b1SBarry Smith #else 61a7e14dcfSSatish Balay PetscReal atol = 1.0e-10; 626f4723b1SBarry Smith #endif 63a7e14dcfSSatish Balay PetscInt info, its; 64a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 65a7e14dcfSSatish Balay 66a7e14dcfSSatish Balay PetscFunctionBegin; 67a7e14dcfSSatish Balay if (!mfqP->usegqt) { 68a7e14dcfSSatish Balay PetscReal maxval; 69a7e14dcfSSatish Balay PetscInt i, j; 70a7e14dcfSSatish Balay 719566063dSJacob Faibussowitsch PetscCall(VecSetValues(mfqP->subb, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 729566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(mfqP->subb)); 739566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(mfqP->subb)); 74a7e14dcfSSatish Balay 759566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->subx, 0.0)); 76a7e14dcfSSatish Balay 779566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->subndel, -1.0)); 789566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->subpdel, +1.0)); 79a7e14dcfSSatish Balay 80ce902467SBarry Smith /* Complete the lower triangle of the Hessian matrix */ 81a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 82ad540459SPierre Jolivet for (j = i + 1; j < mfqP->n; j++) mfqP->Hres[j + mfqP->n * i] = mfqP->Hres[mfqP->n * j + i]; 83a7e14dcfSSatish Balay } 849566063dSJacob Faibussowitsch PetscCall(MatSetValues(mfqP->subH, mfqP->n, mfqP->indices, mfqP->n, mfqP->indices, mfqP->Hres, INSERT_VALUES)); 859566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mfqP->subH, MAT_FINAL_ASSEMBLY)); 869566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mfqP->subH, MAT_FINAL_ASSEMBLY)); 87a7e14dcfSSatish Balay 889566063dSJacob Faibussowitsch PetscCall(TaoResetStatistics(mfqP->subtao)); 899566063dSJacob Faibussowitsch /* PetscCall(TaoSetTolerances(mfqP->subtao,*gnorm,*gnorm,PETSC_DEFAULT)); */ 90a7e14dcfSSatish Balay /* enforce bound constraints -- experimental */ 91a7e14dcfSSatish Balay if (tao->XU && tao->XL) { 929566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->XU, mfqP->subxu)); 939566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subxu, -1.0, tao->solution)); 949566063dSJacob Faibussowitsch PetscCall(VecScale(mfqP->subxu, 1.0 / mfqP->delta)); 959566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->XL, mfqP->subxl)); 969566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subxl, -1.0, tao->solution)); 979566063dSJacob Faibussowitsch PetscCall(VecScale(mfqP->subxl, 1.0 / mfqP->delta)); 98a7e14dcfSSatish Balay 999566063dSJacob Faibussowitsch PetscCall(VecPointwiseMin(mfqP->subxu, mfqP->subxu, mfqP->subpdel)); 1009566063dSJacob Faibussowitsch PetscCall(VecPointwiseMax(mfqP->subxl, mfqP->subxl, mfqP->subndel)); 101a7e14dcfSSatish Balay } else { 1029566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subpdel, mfqP->subxu)); 1039566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subndel, mfqP->subxl)); 104a7e14dcfSSatish Balay } 105a7e14dcfSSatish Balay /* Make sure xu > xl */ 1069566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 1079566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 1089566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 1093c859ba3SBarry Smith PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "upper bound < lower bound in subproblem"); 110a7e14dcfSSatish Balay /* Make sure xu > tao->solution > xl */ 1119566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 1129566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx)); 1139566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 1143c859ba3SBarry Smith PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess < lower bound in subproblem"); 115a7e14dcfSSatish Balay 1169566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subx, mfqP->subpdel)); 1179566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 1189566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 1193c859ba3SBarry Smith PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess > upper bound in subproblem"); 120a7e14dcfSSatish Balay 1219566063dSJacob Faibussowitsch PetscCall(TaoSolve(mfqP->subtao)); 1229566063dSJacob Faibussowitsch PetscCall(TaoGetSolutionStatus(mfqP->subtao, NULL, qmin, NULL, NULL, NULL, NULL)); 123a7e14dcfSSatish Balay 124a7e14dcfSSatish Balay /* test bounds post-solution*/ 1259566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 1269566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx)); 1279566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 128a7e14dcfSSatish Balay if (maxval > 1e-5) { 1299566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "subproblem solution < lower bound\n")); 130a7e14dcfSSatish Balay tao->reason = TAO_DIVERGED_TR_REDUCTION; 131a7e14dcfSSatish Balay } 132a7e14dcfSSatish Balay 1339566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subx, mfqP->subpdel)); 1349566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 1359566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 136a7e14dcfSSatish Balay if (maxval > 1e-5) { 1379566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "subproblem solution > upper bound\n")); 138a7e14dcfSSatish Balay tao->reason = TAO_DIVERGED_TR_REDUCTION; 139a7e14dcfSSatish Balay } 140a7e14dcfSSatish Balay } else { 1413ba16761SJacob Faibussowitsch PetscCall(gqt(mfqP->n, mfqP->Hres, mfqP->n, mfqP->Gres, 1.0, mfqP->gqt_rtol, atol, mfqP->gqt_maxits, gnorm, qmin, mfqP->Xsubproblem, &info, &its, mfqP->work, mfqP->work2, mfqP->work3)); 142a7e14dcfSSatish Balay } 143a7e14dcfSSatish Balay *qmin *= -1; 1443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 145a7e14dcfSSatish Balay } 146a7e14dcfSSatish Balay 147d71ae5a4SJacob Faibussowitsch static PetscErrorCode pounders_update_res(Tao tao) 148d71ae5a4SJacob Faibussowitsch { 149125ddc8aSJason Sarich TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 150125ddc8aSJason Sarich PetscInt i, row, col; 151125ddc8aSJason Sarich PetscBLASInt blasn = mfqP->n, blasn2 = blasn * blasn, blasm = mfqP->m, ione = 1; 1526c95174dSJason Sarich PetscReal zero = 0.0, one = 1.0, wii, factor; 153125ddc8aSJason Sarich 154691b26d3SBarry Smith PetscFunctionBegin; 155ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] = 0; 156ad540459SPierre Jolivet for (i = 0; i < mfqP->n * mfqP->n; i++) mfqP->Hres[i] = 0; 157125ddc8aSJason Sarich 158125ddc8aSJason Sarich /* Compute Gres= sum_ij[wij * (cjgi + cigj)] */ 159737f463aSAlp Dener if (tao->res_weights_v) { 1606c95174dSJason Sarich /* Vector(diagonal) weights: gres = sum_i(wii*ci*gi) */ 161125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 1629566063dSJacob Faibussowitsch PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &factor)); 1636c95174dSJason Sarich factor = factor * mfqP->C[i]; 164792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * i], &ione, mfqP->Gres, &ione)); 165125ddc8aSJason Sarich } 166125ddc8aSJason Sarich 167125ddc8aSJason Sarich /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 1686c95174dSJason Sarich /* vector(diagonal weights) Hres = sum_i(wii*(ci*Hi + gi * gi')*/ 169125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 1709566063dSJacob Faibussowitsch PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &wii)); 171125ddc8aSJason Sarich if (tao->niter > 1) { 172125ddc8aSJason Sarich factor = wii * mfqP->C[i]; 1736c95174dSJason Sarich /* add wii * ci * Hi */ 174792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione)); 175125ddc8aSJason Sarich } 1766c95174dSJason Sarich /* add wii * gi * gi' */ 177792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &wii, &mfqP->Fdiff[blasn * i], &blasn, &mfqP->Fdiff[blasn * i], &blasn, &one, mfqP->Hres, &blasn)); 178125ddc8aSJason Sarich } 179737f463aSAlp Dener } else if (tao->res_weights_w) { 180ce902467SBarry Smith /* General case: .5 * Gres= sum_ij[wij * (cjgi + cigj)] */ 181737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 182737f463aSAlp Dener row = tao->res_weights_rows[i]; 183737f463aSAlp Dener col = tao->res_weights_cols[i]; 184ce902467SBarry Smith 185737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0; 186792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * row], &ione, mfqP->Gres, &ione)); 187737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0; 188792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * col], &ione, mfqP->Gres, &ione)); 189ce902467SBarry Smith } 190ce902467SBarry Smith 191ce902467SBarry Smith /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 1926c95174dSJason Sarich /* .5 * sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 193737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 194737f463aSAlp Dener row = tao->res_weights_rows[i]; 195737f463aSAlp Dener col = tao->res_weights_cols[i]; 196737f463aSAlp Dener factor = tao->res_weights_w[i] / 2.0; 197125ddc8aSJason Sarich /* add wij * gi gj' + wij * gj gi' */ 198792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * row], &blasn, &mfqP->Fdiff[blasn * col], &blasn, &one, mfqP->Hres, &blasn)); 199792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * col], &blasn, &mfqP->Fdiff[blasn * row], &blasn, &one, mfqP->Hres, &blasn)); 200125ddc8aSJason Sarich } 201125ddc8aSJason Sarich if (tao->niter > 1) { 202737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 203737f463aSAlp Dener row = tao->res_weights_rows[i]; 204737f463aSAlp Dener col = tao->res_weights_cols[i]; 205125ddc8aSJason Sarich 206125ddc8aSJason Sarich /* add wij*cj*Hi */ 207737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0; 208792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[row], &blasm, mfqP->Hres, &ione)); 209125ddc8aSJason Sarich 210125ddc8aSJason Sarich /* add wij*ci*Hj */ 211737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0; 212792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[col], &blasm, mfqP->Hres, &ione)); 213125ddc8aSJason Sarich } 214125ddc8aSJason Sarich } 215125ddc8aSJason Sarich } else { 216ce902467SBarry Smith /* Default: Gres= sum_i[cigi] = G*c' */ 2179566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Identity weights\n")); 218792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->C, &ione, &zero, mfqP->Gres, &ione)); 219ce902467SBarry Smith 220ce902467SBarry Smith /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 2216c95174dSJason Sarich /* Hres = G*G' + 0.5 sum {F(xkin,i)*H(:,:,i)} */ 222792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->Fdiff, &blasn, &zero, mfqP->Hres, &blasn)); 223125ddc8aSJason Sarich 224125ddc8aSJason Sarich /* sum(F(xkin,i)*H(:,:,i)) */ 225125ddc8aSJason Sarich if (tao->niter > 1) { 226125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 2276c95174dSJason Sarich factor = mfqP->C[i]; 228792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione)); 229125ddc8aSJason Sarich } 230125ddc8aSJason Sarich } 231125ddc8aSJason Sarich } 2323ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 233125ddc8aSJason Sarich } 234ca88a2e0STodd Munson 235d71ae5a4SJacob Faibussowitsch static PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi) 236d71ae5a4SJacob Faibussowitsch { 237a7e14dcfSSatish Balay /* Phi = .5*[x(1)^2 sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */ 238a7e14dcfSSatish Balay PetscInt i, j, k; 239a7e14dcfSSatish Balay PetscReal sqrt2 = PetscSqrtReal(2.0); 240f06e3bfaSBarry Smith 241a7e14dcfSSatish Balay PetscFunctionBegin; 242a7e14dcfSSatish Balay j = 0; 243a7e14dcfSSatish Balay for (i = 0; i < n; i++) { 244a7e14dcfSSatish Balay phi[j] = 0.5 * x[i] * x[i]; 245a7e14dcfSSatish Balay j++; 246a7e14dcfSSatish Balay for (k = i + 1; k < n; k++) { 247a7e14dcfSSatish Balay phi[j] = x[i] * x[k] / sqrt2; 248a7e14dcfSSatish Balay j++; 249a7e14dcfSSatish Balay } 250a7e14dcfSSatish Balay } 2513ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 252a7e14dcfSSatish Balay } 253a7e14dcfSSatish Balay 254d71ae5a4SJacob Faibussowitsch static PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP) 255d71ae5a4SJacob Faibussowitsch { 256a7e14dcfSSatish Balay /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x' 257a7e14dcfSSatish Balay that satisfies the interpolation conditions Q(X[:,j]) = f(j) 258a7e14dcfSSatish Balay for j=1,...,m and with a Hessian matrix of least Frobenius norm */ 259a7e14dcfSSatish Balay 260a7e14dcfSSatish Balay /* NB --we are ignoring c */ 261a7e14dcfSSatish Balay PetscInt i, j, k, num, np = mfqP->nmodelpoints; 262a7e14dcfSSatish Balay PetscReal one = 1.0, zero = 0.0, negone = -1.0; 263a7e14dcfSSatish Balay PetscBLASInt blasnpmax = mfqP->npmax; 264a7e14dcfSSatish Balay PetscBLASInt blasnplus1 = mfqP->n + 1; 265a7e14dcfSSatish Balay PetscBLASInt blasnp = np; 266a7e14dcfSSatish Balay PetscBLASInt blasint = mfqP->n * (mfqP->n + 1) / 2; 267a7e14dcfSSatish Balay PetscBLASInt blasint2 = np - mfqP->n - 1; 268f06e3bfaSBarry Smith PetscBLASInt info, ione = 1; 269a7e14dcfSSatish Balay PetscReal sqrt2 = PetscSqrtReal(2.0); 270a7e14dcfSSatish Balay 271a7e14dcfSSatish Balay PetscFunctionBegin; 272ad540459SPierre Jolivet for (i = 0; i < mfqP->n * mfqP->m; i++) mfqP->Gdel[i] = 0; 273ad540459SPierre Jolivet for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; i++) mfqP->Hdel[i] = 0; 274a7e14dcfSSatish Balay 275a7e14dcfSSatish Balay /* factor M */ 276792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&blasnplus1, &blasnp, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &info)); 27763a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrf returned with value %" PetscBLASInt_FMT, info); 278a7e14dcfSSatish Balay 279a7e14dcfSSatish Balay if (np == mfqP->n + 1) { 280ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax - mfqP->n - 1; i++) mfqP->omega[i] = 0.0; 281ad540459SPierre Jolivet for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->beta[i] = 0.0; 282a7e14dcfSSatish Balay } else { 283a7e14dcfSSatish Balay /* Let Ltmp = (L'*L) */ 284792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &blasint2, &blasint2, &blasint, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &zero, mfqP->L_tmp, &blasint)); 285a7e14dcfSSatish Balay 286a7e14dcfSSatish Balay /* factor Ltmp */ 287792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &blasint2, mfqP->L_tmp, &blasint, &info)); 28863a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrf returned with value %" PetscBLASInt_FMT, info); 289a7e14dcfSSatish Balay } 290a7e14dcfSSatish Balay 291a7e14dcfSSatish Balay for (k = 0; k < mfqP->m; k++) { 292a7e14dcfSSatish Balay if (np != mfqP->n + 1) { 293a7e14dcfSSatish Balay /* Solve L'*L*Omega = Z' * RESk*/ 294792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasnp, &blasint2, &one, mfqP->Z, &blasnpmax, &mfqP->RES[mfqP->npmax * k], &ione, &zero, mfqP->omega, &ione)); 295792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &blasint2, &ione, mfqP->L_tmp, &blasint, mfqP->omega, &blasint2, &info)); 29663a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrs returned with value %" PetscBLASInt_FMT, info); 297a7e14dcfSSatish Balay 298a7e14dcfSSatish Balay /* Beta = L*Omega */ 299792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasint, &blasint2, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, mfqP->omega, &ione, &zero, mfqP->beta, &ione)); 300a7e14dcfSSatish Balay } 301a7e14dcfSSatish Balay 302a7e14dcfSSatish Balay /* solve M'*Alpha = RESk - N'*Beta */ 303792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasint, &blasnp, &negone, mfqP->N, &blasint, mfqP->beta, &ione, &one, &mfqP->RES[mfqP->npmax * k], &ione)); 304792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &blasnplus1, &ione, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &mfqP->RES[mfqP->npmax * k], &blasnplus1, &info)); 30563a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrs returned with value %" PetscBLASInt_FMT, info); 306a7e14dcfSSatish Balay 307a7e14dcfSSatish Balay /* Gdel(:,k) = Alpha(2:n+1) */ 308ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->Gdel[i + mfqP->n * k] = mfqP->RES[mfqP->npmax * k + i + 1]; 309a7e14dcfSSatish Balay 310a7e14dcfSSatish Balay /* Set Hdels */ 311a7e14dcfSSatish Balay num = 0; 312a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 313a7e14dcfSSatish Balay /* H[i,i,k] = Beta(num) */ 314a7e14dcfSSatish Balay mfqP->Hdel[(i * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num]; 315a7e14dcfSSatish Balay num++; 316a7e14dcfSSatish Balay for (j = i + 1; j < mfqP->n; j++) { 317a7e14dcfSSatish Balay /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */ 318a7e14dcfSSatish Balay mfqP->Hdel[(j * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num] / sqrt2; 319a7e14dcfSSatish Balay mfqP->Hdel[(i * mfqP->n + j) * mfqP->m + k] = mfqP->beta[num] / sqrt2; 320a7e14dcfSSatish Balay num++; 321a7e14dcfSSatish Balay } 322a7e14dcfSSatish Balay } 323a7e14dcfSSatish Balay } 3243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 325a7e14dcfSSatish Balay } 326a7e14dcfSSatish Balay 327d71ae5a4SJacob Faibussowitsch static PetscErrorCode morepoints(TAO_POUNDERS *mfqP) 328d71ae5a4SJacob Faibussowitsch { 329a7e14dcfSSatish Balay /* Assumes mfqP->model_indices[0] is minimum index 330a7e14dcfSSatish Balay Finishes adding points to mfqP->model_indices (up to npmax) 331a7e14dcfSSatish Balay Computes L,Z,M,N 332a7e14dcfSSatish Balay np is actual number of points in model (should equal npmax?) */ 333a7e14dcfSSatish Balay PetscInt point, i, j, offset; 334a7e14dcfSSatish Balay PetscInt reject; 335f06e3bfaSBarry Smith PetscBLASInt blasn = mfqP->n, blasnpmax = mfqP->npmax, blasnplus1 = mfqP->n + 1, info, blasnmax = mfqP->nmax, blasint, blasint2, blasnp, blasmaxmn; 3365e081366SBarry Smith const PetscReal *x; 3375e081366SBarry Smith PetscReal normd; 338a7e14dcfSSatish Balay 339f06e3bfaSBarry Smith PetscFunctionBegin; 340a7e14dcfSSatish Balay /* Initialize M,N */ 341a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 3429566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x)); 343a7e14dcfSSatish Balay mfqP->M[(mfqP->n + 1) * i] = 1.0; 344ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * i)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 3459566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x)); 3469566063dSJacob Faibussowitsch PetscCall(phi2eval(&mfqP->M[1 + ((mfqP->n + 1) * i)], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * i])); 347a7e14dcfSSatish Balay } 348a7e14dcfSSatish Balay 349a7e14dcfSSatish Balay /* Now we add points until we have npmax starting with the most recent ones */ 350a7e14dcfSSatish Balay point = mfqP->nHist - 1; 351a7e14dcfSSatish Balay mfqP->nmodelpoints = mfqP->n + 1; 352a7e14dcfSSatish Balay while (mfqP->nmodelpoints < mfqP->npmax && point >= 0) { 353a7e14dcfSSatish Balay /* Reject any points already in the model */ 354a7e14dcfSSatish Balay reject = 0; 355a7e14dcfSSatish Balay for (j = 0; j < mfqP->n + 1; j++) { 356a7e14dcfSSatish Balay if (point == mfqP->model_indices[j]) { 357a7e14dcfSSatish Balay reject = 1; 358a7e14dcfSSatish Balay break; 359a7e14dcfSSatish Balay } 360a7e14dcfSSatish Balay } 361a7e14dcfSSatish Balay 362a7e14dcfSSatish Balay /* Reject if norm(d) >c2 */ 363a7e14dcfSSatish Balay if (!reject) { 3649566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[point], mfqP->workxvec)); 3659566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->workxvec, -1.0, mfqP->Xhist[mfqP->minindex])); 3669566063dSJacob Faibussowitsch PetscCall(VecNorm(mfqP->workxvec, NORM_2, &normd)); 367a7e14dcfSSatish Balay normd /= mfqP->delta; 368ad540459SPierre Jolivet if (normd > mfqP->c2) reject = 1; 369a7e14dcfSSatish Balay } 370f06e3bfaSBarry Smith if (reject) { 371a7e14dcfSSatish Balay point--; 372a7e14dcfSSatish Balay continue; 373a7e14dcfSSatish Balay } 374a7e14dcfSSatish Balay 3759566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[point], &x)); 376a7e14dcfSSatish Balay mfqP->M[(mfqP->n + 1) * mfqP->nmodelpoints] = 1.0; 377ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * mfqP->nmodelpoints)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 3789566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[point], &x)); 3799566063dSJacob Faibussowitsch PetscCall(phi2eval(&mfqP->M[1 + (mfqP->n + 1) * mfqP->nmodelpoints], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * (mfqP->nmodelpoints)])); 380a7e14dcfSSatish Balay 381a7e14dcfSSatish Balay /* Update QR factorization */ 382a7e14dcfSSatish Balay /* Copy M' to Q_tmp */ 383a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 384ad540459SPierre Jolivet for (j = 0; j < mfqP->npmax; j++) mfqP->Q_tmp[j + mfqP->npmax * i] = mfqP->M[i + (mfqP->n + 1) * j]; 385a7e14dcfSSatish Balay } 386a7e14dcfSSatish Balay blasnp = mfqP->nmodelpoints + 1; 387a7e14dcfSSatish Balay /* Q_tmp,R = qr(M') */ 388a7e14dcfSSatish Balay blasmaxmn = PetscMax(mfqP->m, mfqP->n + 1); 389792fecdfSBarry Smith PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->mwork, &blasmaxmn, &info)); 39063a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine geqrf returned with value %" PetscBLASInt_FMT, info); 391a7e14dcfSSatish Balay 392a7e14dcfSSatish Balay /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */ 393a7e14dcfSSatish Balay /* L = N*Qtmp */ 394a7e14dcfSSatish Balay blasint2 = mfqP->n * (mfqP->n + 1) / 2; 395a7e14dcfSSatish Balay /* Copy N to L_tmp */ 396ad540459SPierre Jolivet for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2 * mfqP->npmax; i++) mfqP->L_tmp[i] = mfqP->N[i]; 397a7e14dcfSSatish Balay /* Copy L_save to L_tmp */ 398a7e14dcfSSatish Balay 399a7e14dcfSSatish Balay /* L_tmp = N*Qtmp' */ 400792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasint2, &blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->L_tmp, &blasint2, mfqP->npmaxwork, &blasnmax, &info)); 40163a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info); 402a7e14dcfSSatish Balay 403a7e14dcfSSatish Balay /* Copy L_tmp to L_save */ 404ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L_save[i] = mfqP->L_tmp[i]; 405a7e14dcfSSatish Balay 406a7e14dcfSSatish Balay /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */ 407a7e14dcfSSatish Balay blasint = mfqP->nmodelpoints - mfqP->n; 4089566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 409792fecdfSBarry Smith PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &blasint2, &blasint, &mfqP->L_tmp[(mfqP->n + 1) * blasint2], &blasint2, mfqP->beta, mfqP->work, &blasn, mfqP->work, &blasn, mfqP->npmaxwork, &blasnmax, &info)); 4109566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop()); 41163a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine gesvd returned with value %" PetscBLASInt_FMT, info); 412a7e14dcfSSatish Balay 413a7e14dcfSSatish Balay if (mfqP->beta[PetscMin(blasint, blasint2) - 1] > mfqP->theta2) { 414a7e14dcfSSatish Balay /* accept point */ 415a7e14dcfSSatish Balay mfqP->model_indices[mfqP->nmodelpoints] = point; 416a7e14dcfSSatish Balay /* Copy Q_tmp to Q */ 417ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q[i] = mfqP->Q_tmp[i]; 418ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax; i++) mfqP->tau[i] = mfqP->tau_tmp[i]; 419a7e14dcfSSatish Balay mfqP->nmodelpoints++; 4201f4a0ff5SJason Sarich blasnp = mfqP->nmodelpoints; 421a7e14dcfSSatish Balay 422a7e14dcfSSatish Balay /* Copy L_save to L */ 423ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = mfqP->L_save[i]; 424a7e14dcfSSatish Balay } 425a7e14dcfSSatish Balay point--; 426a7e14dcfSSatish Balay } 427a7e14dcfSSatish Balay 428a7e14dcfSSatish Balay blasnp = mfqP->nmodelpoints; 429a7e14dcfSSatish Balay /* Copy Q(:,n+2:np) to Z */ 430a7e14dcfSSatish Balay /* First set Q_tmp to I */ 431ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q_tmp[i] = 0.0; 432ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax; i++) mfqP->Q_tmp[i + mfqP->npmax * i] = 1.0; 433a7e14dcfSSatish Balay 434a7e14dcfSSatish Balay /* Q_tmp = I * Q */ 435792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasnp, &blasnp, &blasnplus1, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info)); 43663a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info); 437a7e14dcfSSatish Balay 438a7e14dcfSSatish Balay /* Copy Q_tmp(:,n+2:np) to Z) */ 439a7e14dcfSSatish Balay offset = mfqP->npmax * (mfqP->n + 1); 440ad540459SPierre Jolivet for (i = offset; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Z[i - offset] = mfqP->Q_tmp[i]; 441a7e14dcfSSatish Balay 442a7e14dcfSSatish Balay if (mfqP->nmodelpoints == mfqP->n + 1) { 443a7e14dcfSSatish Balay /* Set L to I_{n+1} */ 444ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = 0.0; 445ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->L[(mfqP->n * (mfqP->n + 1) / 2) * i + i] = 1.0; 446a7e14dcfSSatish Balay } 4473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 448a7e14dcfSSatish Balay } 449a7e14dcfSSatish Balay 450a7e14dcfSSatish Balay /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */ 451d71ae5a4SJacob Faibussowitsch static PetscErrorCode addpoint(Tao tao, TAO_POUNDERS *mfqP, PetscInt index) 452d71ae5a4SJacob Faibussowitsch { 453a7e14dcfSSatish Balay PetscFunctionBegin; 454a7e14dcfSSatish Balay /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/ 4559566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->Xhist[0], &mfqP->Xhist[mfqP->nHist])); 4569566063dSJacob Faibussowitsch PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, &mfqP->Q_tmp[index * mfqP->npmax], INSERT_VALUES)); 4579566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist])); 4589566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist])); 4599566063dSJacob Faibussowitsch PetscCall(VecAYPX(mfqP->Xhist[mfqP->nHist], mfqP->delta, mfqP->Xhist[mfqP->minindex])); 460a7e14dcfSSatish Balay 461a7e14dcfSSatish Balay /* Project into feasible region */ 46248a46eb9SPierre Jolivet if (tao->XU && tao->XL) PetscCall(VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU, mfqP->Xhist[mfqP->nHist])); 463a7e14dcfSSatish Balay 464a7e14dcfSSatish Balay /* Compute value of new vector */ 4659566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->Fhist[0], &mfqP->Fhist[mfqP->nHist])); 466a7e14dcfSSatish Balay CHKMEMQ; 4679566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist])); 468a7e14dcfSSatish Balay 469a7e14dcfSSatish Balay /* Add new vector to model */ 470a7e14dcfSSatish Balay mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist; 471a7e14dcfSSatish Balay mfqP->nmodelpoints++; 472a7e14dcfSSatish Balay mfqP->nHist++; 4733ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 474a7e14dcfSSatish Balay } 475a7e14dcfSSatish Balay 476d71ae5a4SJacob Faibussowitsch static PetscErrorCode modelimprove(Tao tao, TAO_POUNDERS *mfqP, PetscInt addallpoints) 477d71ae5a4SJacob Faibussowitsch { 478a7e14dcfSSatish Balay /* modeld = Q(:,np+1:n)' */ 479a7e14dcfSSatish Balay PetscInt i, j, minindex = 0; 480e270355aSBarry Smith PetscReal dp, half = 0.5, one = 1.0, minvalue = PETSC_INFINITY; 481a7e14dcfSSatish Balay PetscBLASInt blasn = mfqP->n, blasnpmax = mfqP->npmax, blask, info; 482a7e14dcfSSatish Balay PetscBLASInt blas1 = 1, blasnmax = mfqP->nmax; 483f06e3bfaSBarry Smith 484362febeeSStefano Zampini PetscFunctionBegin; 485a7e14dcfSSatish Balay blask = mfqP->nmodelpoints; 486a7e14dcfSSatish Balay /* Qtmp = I(n x n) */ 487a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 488ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i + mfqP->npmax * j] = 0.0; 489a7e14dcfSSatish Balay } 490ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[j + mfqP->npmax * j] = 1.0; 491a7e14dcfSSatish Balay 492a7e14dcfSSatish Balay /* Qtmp = Q * I */ 493792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasn, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info)); 494a7e14dcfSSatish Balay 495a7e14dcfSSatish Balay for (i = mfqP->nmodelpoints; i < mfqP->n; i++) { 496792fecdfSBarry Smith PetscCallBLAS("BLASdot", dp = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->Gres, &blas1)); 497a7e14dcfSSatish Balay if (dp > 0.0) { /* Model says use the other direction! */ 498ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i * mfqP->npmax + j] *= -1; 499a7e14dcfSSatish Balay } 500a7e14dcfSSatish Balay /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */ 501ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->work2[j] = mfqP->Gres[j]; 502792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &half, mfqP->Hres, &blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, &one, mfqP->work2, &blas1)); 503792fecdfSBarry Smith PetscCallBLAS("BLASdot", mfqP->work[i] = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->work2, &blas1)); 504a7e14dcfSSatish Balay if (i == mfqP->nmodelpoints || mfqP->work[i] < minvalue) { 505a7e14dcfSSatish Balay minindex = i; 506a7e14dcfSSatish Balay minvalue = mfqP->work[i]; 507a7e14dcfSSatish Balay } 50848a46eb9SPierre Jolivet if (addallpoints != 0) PetscCall(addpoint(tao, mfqP, i)); 509a7e14dcfSSatish Balay } 51048a46eb9SPierre Jolivet if (!addallpoints) PetscCall(addpoint(tao, mfqP, minindex)); 5113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 512a7e14dcfSSatish Balay } 513a7e14dcfSSatish Balay 514d71ae5a4SJacob Faibussowitsch static PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, PetscReal c) 515d71ae5a4SJacob Faibussowitsch { 516a7e14dcfSSatish Balay PetscInt i, j; 517a7e14dcfSSatish Balay PetscBLASInt blasm = mfqP->m, blasj, blask, blasn = mfqP->n, ione = 1, info; 518a7e14dcfSSatish Balay PetscBLASInt blasnpmax = mfqP->npmax, blasmaxmn; 519a7e14dcfSSatish Balay PetscReal proj, normd; 5205e081366SBarry Smith const PetscReal *x; 521a7e14dcfSSatish Balay 522f06e3bfaSBarry Smith PetscFunctionBegin; 523a7e14dcfSSatish Balay for (i = mfqP->nHist - 1; i >= 0; i--) { 5249566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[i], &x)); 525ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->work[j] = (x[j] - xmin[j]) / mfqP->delta; 5269566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[i], &x)); 527792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, mfqP->work2, &ione)); 528792fecdfSBarry Smith PetscCallBLAS("BLASnrm2", normd = BLASnrm2_(&blasn, mfqP->work, &ione)); 529a8bd9cb3STodd Munson if (normd <= c) { 53029c6e7daSBarry Smith blasj = PetscMax((mfqP->n - mfqP->nmodelpoints), 0); 531a7e14dcfSSatish Balay if (!mfqP->q_is_I) { 532a7e14dcfSSatish Balay /* project D onto null */ 533a7e14dcfSSatish Balay blask = (mfqP->nmodelpoints); 534792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &ione, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->work2, &ione, mfqP->mwork, &blasm, &info)); 53563a3b9bcSJacob Faibussowitsch PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "ormqr returned value %" PetscBLASInt_FMT, info); 536a7e14dcfSSatish Balay } 537792fecdfSBarry Smith PetscCallBLAS("BLASnrm2", proj = BLASnrm2_(&blasj, &mfqP->work2[mfqP->nmodelpoints], &ione)); 538a7e14dcfSSatish Balay 539a7e14dcfSSatish Balay if (proj >= mfqP->theta1) { /* add this index to model */ 540a7e14dcfSSatish Balay mfqP->model_indices[mfqP->nmodelpoints] = i; 541a7e14dcfSSatish Balay mfqP->nmodelpoints++; 542792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, &mfqP->Q_tmp[mfqP->npmax * (mfqP->nmodelpoints - 1)], &ione)); 543a7e14dcfSSatish Balay blask = mfqP->npmax * (mfqP->nmodelpoints); 544792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blask, mfqP->Q_tmp, &ione, mfqP->Q, &ione)); 545a7e14dcfSSatish Balay blask = mfqP->nmodelpoints; 546a7e14dcfSSatish Balay blasmaxmn = PetscMax(mfqP->m, mfqP->n); 547792fecdfSBarry Smith PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->mwork, &blasmaxmn, &info)); 54863a3b9bcSJacob Faibussowitsch PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "geqrf returned value %" PetscBLASInt_FMT, info); 549a7e14dcfSSatish Balay mfqP->q_is_I = 0; 550a7e14dcfSSatish Balay } 551ad540459SPierre Jolivet if (mfqP->nmodelpoints == mfqP->n) break; 552a7e14dcfSSatish Balay } 553a7e14dcfSSatish Balay } 5543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 555a7e14dcfSSatish Balay } 556f06e3bfaSBarry Smith 557d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoSolve_POUNDERS(Tao tao) 558d71ae5a4SJacob Faibussowitsch { 559a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 5608931d482SJason Sarich PetscInt i, ii, j, k, l; 561a7e14dcfSSatish Balay PetscReal step = 1.0; 562a7e14dcfSSatish Balay PetscInt low, high; 563a7e14dcfSSatish Balay PetscReal minnorm; 5645e081366SBarry Smith PetscReal *x, *f; 5655e081366SBarry Smith const PetscReal *xmint, *fmin; 566bd026e97SJed Brown PetscReal deltaold; 567a7e14dcfSSatish Balay PetscReal gnorm; 568a7e14dcfSSatish Balay PetscBLASInt info, ione = 1, iblas; 569ff2b74efSJason Sarich PetscBool valid, same; 570a7e14dcfSSatish Balay PetscReal mdec, rho, normxsp; 571a7e14dcfSSatish Balay PetscReal one = 1.0, zero = 0.0, ratio; 572125ddc8aSJason Sarich PetscBLASInt blasm, blasn, blasncopy, blasnpmax; 573a7bbbda8SBarry Smith static PetscBool set = PETSC_FALSE; 574a7e14dcfSSatish Balay 575a7e14dcfSSatish Balay /* n = # of parameters 576a7e14dcfSSatish Balay m = dimension (components) of function */ 577a7e14dcfSSatish Balay PetscFunctionBegin; 5789566063dSJacob Faibussowitsch PetscCall(PetscCitationsRegister("@article{UNEDF0,\n" 579a7bbbda8SBarry Smith "title = {Nuclear energy density optimization},\n" 580a7bbbda8SBarry Smith "author = {Kortelainen, M. and Lesinski, T. and Mor\'e, J. and Nazarewicz, W.\n" 581a7bbbda8SBarry Smith " and Sarich, J. and Schunck, N. and Stoitsov, M. V. and Wild, S. },\n" 582a7bbbda8SBarry Smith "journal = {Phys. Rev. C},\n" 583a7bbbda8SBarry Smith "volume = {82},\n" 584a7bbbda8SBarry Smith "number = {2},\n" 585a7bbbda8SBarry Smith "pages = {024313},\n" 586a7bbbda8SBarry Smith "numpages = {18},\n" 587a7bbbda8SBarry Smith "year = {2010},\n" 588a7bbbda8SBarry Smith "month = {Aug},\n" 5899371c9d4SSatish Balay "doi = {10.1103/PhysRevC.82.024313}\n}\n", 5909371c9d4SSatish Balay &set)); 591e6d4cb7fSJason Sarich tao->niter = 0; 592a7e14dcfSSatish Balay if (tao->XL && tao->XU) { 593a7e14dcfSSatish Balay /* Check x0 <= XU */ 594a7e14dcfSSatish Balay PetscReal val; 595ce902467SBarry Smith 5969566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->solution, mfqP->Xhist[0])); 5979566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU)); 5989566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 5993c859ba3SBarry Smith PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 > upper bound"); 600a7e14dcfSSatish Balay 601a7e14dcfSSatish Balay /* Check x0 >= xl */ 6029566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->XL, mfqP->Xhist[0])); 6039566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->solution)); 6049566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 6053c859ba3SBarry Smith PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 < lower bound"); 606f06e3bfaSBarry Smith 607a7e14dcfSSatish Balay /* Check x0 + delta < XU -- should be able to get around this eventually */ 608a7e14dcfSSatish Balay 6099566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->Xhist[0], mfqP->delta)); 6109566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], 1.0, tao->solution)); 6119566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU)); 6129566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 6133c859ba3SBarry Smith PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 + delta > upper bound"); 614a7e14dcfSSatish Balay } 615a7e14dcfSSatish Balay 6169371c9d4SSatish Balay blasm = mfqP->m; 6179371c9d4SSatish Balay blasn = mfqP->n; 6189371c9d4SSatish Balay blasnpmax = mfqP->npmax; 619ce902467SBarry Smith for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; ++i) mfqP->H[i] = 0; 620a7e14dcfSSatish Balay 6219566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->solution, mfqP->Xhist[0])); 622ce902467SBarry Smith 623ce902467SBarry Smith /* This provides enough information to approximate the gradient of the objective */ 624ce902467SBarry Smith /* using a forward difference scheme. */ 625ce902467SBarry Smith 6269566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Initialize simplex; delta = %10.9e\n", (double)mfqP->delta)); 6279566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[0], mfqP->Fhist[0], &mfqP->Fres[0])); 628a7e14dcfSSatish Balay mfqP->minindex = 0; 629a7e14dcfSSatish Balay minnorm = mfqP->Fres[0]; 630ff2b74efSJason Sarich 6319566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(mfqP->Xhist[0], &low, &high)); 632805332fbSTodd Munson for (i = 1; i < mfqP->n + 1; ++i) { 6339566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[0], mfqP->Xhist[i])); 634ce902467SBarry Smith 635a7e14dcfSSatish Balay if (i - 1 >= low && i - 1 < high) { 6369566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Xhist[i], &x)); 637a7e14dcfSSatish Balay x[i - 1 - low] += mfqP->delta; 6389566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Xhist[i], &x)); 639a7e14dcfSSatish Balay } 640a7e14dcfSSatish Balay CHKMEMQ; 6419566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[i], mfqP->Fhist[i], &mfqP->Fres[i])); 642a7e14dcfSSatish Balay if (mfqP->Fres[i] < minnorm) { 643a7e14dcfSSatish Balay mfqP->minindex = i; 644a7e14dcfSSatish Balay minnorm = mfqP->Fres[i]; 645a7e14dcfSSatish Balay } 646a7e14dcfSSatish Balay } 6479566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 6489566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res)); 6499566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Finalize simplex; minnorm = %10.9e\n", (double)minnorm)); 650ce902467SBarry Smith 651a7e14dcfSSatish Balay /* Gather mpi vecs to one big local vec */ 652a7e14dcfSSatish Balay 653a7e14dcfSSatish Balay /* Begin serial code */ 654a7e14dcfSSatish Balay 655a7e14dcfSSatish Balay /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */ 656a7e14dcfSSatish Balay /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */ 657a7e14dcfSSatish Balay /* (Column oriented for blas calls) */ 658a7e14dcfSSatish Balay ii = 0; 659a7e14dcfSSatish Balay 66063a3b9bcSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Build matrix: %" PetscInt_FMT "\n", (PetscInt)mfqP->size)); 661805332fbSTodd Munson if (1 == mfqP->size) { 6629566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 663a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 6649566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 6659566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 666a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 667a7e14dcfSSatish Balay if (i == mfqP->minindex) continue; 668a7e14dcfSSatish Balay 6699566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Xhist[i], &x)); 670ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 6719566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Xhist[i], &x)); 672a7e14dcfSSatish Balay 6739566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Fhist[i], &f)); 674ad540459SPierre Jolivet for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; 6759566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Fhist[i], &f)); 67694e8dccaSTodd Munson 677a7e14dcfSSatish Balay mfqP->model_indices[ii++] = i; 678a7e14dcfSSatish Balay } 679ad540459SPierre Jolivet for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j]; 6809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 681a7e14dcfSSatish Balay } else { 6829566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->localxmin, 0)); 6839566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD)); 6849566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD)); 685ce902467SBarry Smith 6869566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->localxmin, &xmint)); 687a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 6889566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->localxmin, &xmint)); 689a7e14dcfSSatish Balay 6909566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD)); 6919566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD)); 6929566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->localfmin, &fmin)); 693a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 694a7e14dcfSSatish Balay if (i == mfqP->minindex) continue; 695a7e14dcfSSatish Balay 6969566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD)); 6979566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD)); 6989566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->localx, &x)); 699ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 7009566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->localx, &x)); 701a7e14dcfSSatish Balay 7029566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD)); 7039566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD)); 7049566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->localf, &f)); 705ad540459SPierre Jolivet for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; 7069566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->localf, &f)); 70794e8dccaSTodd Munson 70894e8dccaSTodd Munson mfqP->model_indices[ii++] = i; 709a7e14dcfSSatish Balay } 710ad540459SPierre Jolivet for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j]; 7119566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->localfmin, &fmin)); 712a7e14dcfSSatish Balay } 713a7e14dcfSSatish Balay 714a7e14dcfSSatish Balay /* Determine the initial quadratic models */ 715a7e14dcfSSatish Balay /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */ 716a7e14dcfSSatish Balay /* D (nxn) Fdiff (nxm) => G (nxm) */ 717125ddc8aSJason Sarich blasncopy = blasn; 718792fecdfSBarry Smith PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&blasn, &blasm, mfqP->Disp, &blasnpmax, mfqP->iwork, mfqP->Fdiff, &blasncopy, &info)); 71963a3b9bcSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Linear solve return: %" PetscInt_FMT "\n", (PetscInt)info)); 720a7e14dcfSSatish Balay 7219566063dSJacob Faibussowitsch PetscCall(pounders_update_res(tao)); 722a7e14dcfSSatish Balay 723a7e14dcfSSatish Balay valid = PETSC_TRUE; 724a7e14dcfSSatish Balay 7259566063dSJacob Faibussowitsch PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 7269566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(tao->gradient)); 7279566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(tao->gradient)); 7289566063dSJacob Faibussowitsch PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 729a7e14dcfSSatish Balay gnorm *= mfqP->delta; 7309566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 731763847b4SAlp Dener 732763847b4SAlp Dener tao->reason = TAO_CONTINUE_ITERATING; 7339566063dSJacob Faibussowitsch PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its)); 7349566063dSJacob Faibussowitsch PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step)); 735dbbe0bcdSBarry Smith PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 736763847b4SAlp Dener 737a7e14dcfSSatish Balay mfqP->nHist = mfqP->n + 1; 738a7e14dcfSSatish Balay mfqP->nmodelpoints = mfqP->n + 1; 7399566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Initial gradient: %20.19e\n", (double)gnorm)); 740a7e14dcfSSatish Balay 741763847b4SAlp Dener while (tao->reason == TAO_CONTINUE_ITERATING) { 742a7e14dcfSSatish Balay PetscReal gnm = 1e-4; 743e1e80dc8SAlp Dener /* Call general purpose update function */ 744dbbe0bcdSBarry Smith PetscTryTypeMethod(tao, update, tao->niter, tao->user_update); 7458931d482SJason Sarich tao->niter++; 746ce902467SBarry Smith /* Solve the subproblem min{Q(s): ||s|| <= 1.0} */ 7479566063dSJacob Faibussowitsch PetscCall(gqtwrap(tao, &gnm, &mdec)); 748a7e14dcfSSatish Balay /* Evaluate the function at the new point */ 749a7e14dcfSSatish Balay 750ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->work[i] = mfqP->Xsubproblem[i] * mfqP->delta + mfqP->xmin[i]; 7519566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[mfqP->nHist])); 7529566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[mfqP->nHist])); 7539566063dSJacob Faibussowitsch PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, mfqP->work, INSERT_VALUES)); 7549566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist])); 7559566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist])); 756a7e14dcfSSatish Balay 7579566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist])); 758125ddc8aSJason Sarich 759a7e14dcfSSatish Balay rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec; 760a7e14dcfSSatish Balay mfqP->nHist++; 761a7e14dcfSSatish Balay 762a7e14dcfSSatish Balay /* Update the center */ 763a7e14dcfSSatish Balay if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid == PETSC_TRUE)) { 764a7e14dcfSSatish Balay /* Update model to reflect new base point */ 765ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i]) / mfqP->delta; 766a7e14dcfSSatish Balay for (j = 0; j < mfqP->m; j++) { 767a7e14dcfSSatish Balay /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work'; 768a7e14dcfSSatish Balay G(:,j) = G(:,j) + H(:,:,j)*work' */ 769a7e14dcfSSatish Balay for (k = 0; k < mfqP->n; k++) { 770a7e14dcfSSatish Balay mfqP->work2[k] = 0.0; 771ad540459SPierre Jolivet for (l = 0; l < mfqP->n; l++) mfqP->work2[k] += mfqP->H[j + mfqP->m * (k + l * mfqP->n)] * mfqP->work[l]; 772a7e14dcfSSatish Balay } 773a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 774a7e14dcfSSatish Balay mfqP->C[j] += mfqP->work[i] * (mfqP->Fdiff[i + mfqP->n * j] + 0.5 * mfqP->work2[i]); 775a7e14dcfSSatish Balay mfqP->Fdiff[i + mfqP->n * j] += mfqP->work2[i]; 776a7e14dcfSSatish Balay } 777a7e14dcfSSatish Balay } 778a7e14dcfSSatish Balay /* Cres += work*Gres + .5*work*Hres*work'; 779a7e14dcfSSatish Balay Gres += Hres*work'; */ 780a7e14dcfSSatish Balay 781792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &one, mfqP->Hres, &blasn, mfqP->work, &ione, &zero, mfqP->work2, &ione)); 782ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] += mfqP->work2[i]; 783a7e14dcfSSatish Balay mfqP->minindex = mfqP->nHist - 1; 784a7e14dcfSSatish Balay minnorm = mfqP->Fres[mfqP->minindex]; 7859566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res)); 786a7e14dcfSSatish Balay /* Change current center */ 7879566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 788ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 7899566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 790a7e14dcfSSatish Balay } 791a7e14dcfSSatish Balay 792a7e14dcfSSatish Balay /* Evaluate at a model-improving point if necessary */ 793a7e14dcfSSatish Balay if (valid == PETSC_FALSE) { 794a7e14dcfSSatish Balay mfqP->q_is_I = 1; 7958a35fac9SJason Sarich mfqP->nmodelpoints = 0; 7969566063dSJacob Faibussowitsch PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1)); 797a7e14dcfSSatish Balay if (mfqP->nmodelpoints < mfqP->n) { 7989566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Model not valid -- model-improving\n")); 7999566063dSJacob Faibussowitsch PetscCall(modelimprove(tao, mfqP, 1)); 800a7e14dcfSSatish Balay } 801a7e14dcfSSatish Balay } 802a7e14dcfSSatish Balay 803a7e14dcfSSatish Balay /* Update the trust region radius */ 804a7e14dcfSSatish Balay deltaold = mfqP->delta; 805a7e14dcfSSatish Balay normxsp = 0; 806ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) normxsp += mfqP->Xsubproblem[i] * mfqP->Xsubproblem[i]; 807a7e14dcfSSatish Balay normxsp = PetscSqrtReal(normxsp); 808a7e14dcfSSatish Balay if (rho >= mfqP->eta1 && normxsp > 0.5 * mfqP->delta) { 809a7e14dcfSSatish Balay mfqP->delta = PetscMin(mfqP->delta * mfqP->gamma1, mfqP->deltamax); 810a7e14dcfSSatish Balay } else if (valid == PETSC_TRUE) { 811a7e14dcfSSatish Balay mfqP->delta = PetscMax(mfqP->delta * mfqP->gamma0, mfqP->deltamin); 812a7e14dcfSSatish Balay } 813a7e14dcfSSatish Balay 814a7e14dcfSSatish Balay /* Compute the next interpolation set */ 815a7e14dcfSSatish Balay mfqP->q_is_I = 1; 816a7e14dcfSSatish Balay mfqP->nmodelpoints = 0; 8179566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c1 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c1)); 8189566063dSJacob Faibussowitsch PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1)); 819a7e14dcfSSatish Balay if (mfqP->nmodelpoints == mfqP->n) { 820a7e14dcfSSatish Balay valid = PETSC_TRUE; 821a7e14dcfSSatish Balay } else { 822a7e14dcfSSatish Balay valid = PETSC_FALSE; 8239566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c2 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c2)); 8249566063dSJacob Faibussowitsch PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c2)); 825a7e14dcfSSatish Balay if (mfqP->n > mfqP->nmodelpoints) { 8269566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Model not valid -- adding geometry points\n")); 8279566063dSJacob Faibussowitsch PetscCall(modelimprove(tao, mfqP, mfqP->n - mfqP->nmodelpoints)); 828a7e14dcfSSatish Balay } 829a7e14dcfSSatish Balay } 830ad540459SPierre Jolivet for (i = mfqP->nmodelpoints; i > 0; i--) mfqP->model_indices[i] = mfqP->model_indices[i - 1]; 831a7e14dcfSSatish Balay mfqP->nmodelpoints++; 832a7e14dcfSSatish Balay mfqP->model_indices[0] = mfqP->minindex; 8339566063dSJacob Faibussowitsch PetscCall(morepoints(mfqP)); 834a7e14dcfSSatish Balay for (i = 0; i < mfqP->nmodelpoints; i++) { 8359566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Xhist[mfqP->model_indices[i]], &x)); 836ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Disp[i + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / deltaold; 8379566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]], &x)); 8389566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Fhist[mfqP->model_indices[i]], &f)); 839a7e14dcfSSatish Balay for (j = 0; j < mfqP->m; j++) { 840a7e14dcfSSatish Balay for (k = 0; k < mfqP->n; k++) { 841a7e14dcfSSatish Balay mfqP->work[k] = 0.0; 842ad540459SPierre Jolivet for (l = 0; l < mfqP->n; l++) mfqP->work[k] += mfqP->H[j + mfqP->m * (k + mfqP->n * l)] * mfqP->Disp[i + mfqP->npmax * l]; 843a7e14dcfSSatish Balay } 844792fecdfSBarry Smith PetscCallBLAS("BLASdot", mfqP->RES[j * mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn, &mfqP->Fdiff[j * mfqP->n], &ione, &mfqP->Disp[i], &blasnpmax) - 0.5 * BLASdot_(&blasn, mfqP->work, &ione, &mfqP->Disp[i], &blasnpmax) + f[j]); 845a7e14dcfSSatish Balay } 8469566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]], &f)); 847a7e14dcfSSatish Balay } 848a7e14dcfSSatish Balay 849a7e14dcfSSatish Balay /* Update the quadratic model */ 85063a3b9bcSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Get Quad, size: %" PetscInt_FMT ", points: %" PetscInt_FMT "\n", mfqP->n, mfqP->nmodelpoints)); 8519566063dSJacob Faibussowitsch PetscCall(getquadpounders(mfqP)); 8529566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 853792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasm, fmin, &ione, mfqP->C, &ione)); 854a7e14dcfSSatish Balay /* G = G*(delta/deltaold) + Gdel */ 855a7e14dcfSSatish Balay ratio = mfqP->delta / deltaold; 856a7e14dcfSSatish Balay iblas = blasm * blasn; 857792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->Fdiff, &ione)); 858792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Gdel, &ione, mfqP->Fdiff, &ione)); 859125ddc8aSJason Sarich /* H = H*(delta/deltaold)^2 + Hdel */ 860a7e14dcfSSatish Balay iblas = blasm * blasn * blasn; 861a7e14dcfSSatish Balay ratio *= ratio; 862792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->H, &ione)); 863792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Hdel, &ione, mfqP->H, &ione)); 864a7e14dcfSSatish Balay 865a7e14dcfSSatish Balay /* Get residuals */ 8669566063dSJacob Faibussowitsch PetscCall(pounders_update_res(tao)); 867a7e14dcfSSatish Balay 868a7e14dcfSSatish Balay /* Export solution and gradient residual to TAO */ 8699566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 8709566063dSJacob Faibussowitsch PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 8719566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(tao->gradient)); 8729566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(tao->gradient)); 8739566063dSJacob Faibussowitsch PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 874a7e14dcfSSatish Balay gnorm *= mfqP->delta; 875a7e14dcfSSatish Balay /* final criticality test */ 8769566063dSJacob Faibussowitsch PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its)); 8779566063dSJacob Faibussowitsch PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step)); 878dbbe0bcdSBarry Smith PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 879ff2b74efSJason Sarich /* test for repeated model */ 880c9376581SJason Sarich if (mfqP->nmodelpoints == mfqP->last_nmodelpoints) { 881c9376581SJason Sarich same = PETSC_TRUE; 882c9376581SJason Sarich } else { 883c9376581SJason Sarich same = PETSC_FALSE; 884c9376581SJason Sarich } 885ff2b74efSJason Sarich for (i = 0; i < mfqP->nmodelpoints; i++) { 886c9376581SJason Sarich if (same) { 887c9376581SJason Sarich if (mfqP->model_indices[i] == mfqP->last_model_indices[i]) { 888c9376581SJason Sarich same = PETSC_TRUE; 889c9376581SJason Sarich } else { 890c9376581SJason Sarich same = PETSC_FALSE; 891c9376581SJason Sarich } 892c9376581SJason Sarich } 893ff2b74efSJason Sarich mfqP->last_model_indices[i] = mfqP->model_indices[i]; 894ff2b74efSJason Sarich } 895ff2b74efSJason Sarich mfqP->last_nmodelpoints = mfqP->nmodelpoints; 896ff2b74efSJason Sarich if (same && mfqP->delta == deltaold) { 8979566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Identical model used in successive iterations\n")); 8985d72676cSJason Sarich tao->reason = TAO_CONVERGED_STEPTOL; 899ff2b74efSJason Sarich } 900a7e14dcfSSatish Balay } 9013ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 902a7e14dcfSSatish Balay } 903a7e14dcfSSatish Balay 904d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoSetUp_POUNDERS(Tao tao) 905d71ae5a4SJacob Faibussowitsch { 906a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 907125ddc8aSJason Sarich PetscInt i, j; 908a7e14dcfSSatish Balay IS isfloc, isfglob, isxloc, isxglob; 909a7e14dcfSSatish Balay 910a7e14dcfSSatish Balay PetscFunctionBegin; 9119566063dSJacob Faibussowitsch if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient)); 9129566063dSJacob Faibussowitsch if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection)); 9139566063dSJacob Faibussowitsch PetscCall(VecGetSize(tao->solution, &mfqP->n)); 9149566063dSJacob Faibussowitsch PetscCall(VecGetSize(tao->ls_res, &mfqP->m)); 915a7e14dcfSSatish Balay mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n); 916*606f75f6SBarry Smith if (mfqP->npmax == PETSC_CURRENT) mfqP->npmax = 2 * mfqP->n + 1; 917a7e14dcfSSatish Balay mfqP->npmax = PetscMin((mfqP->n + 1) * (mfqP->n + 2) / 2, mfqP->npmax); 918a7e14dcfSSatish Balay mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n + 2); 919a7e14dcfSSatish Balay 9209566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Xhist)); 9219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fhist)); 922a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 9239566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[i])); 9249566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[i])); 925a7e14dcfSSatish Balay } 9269566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->solution, &mfqP->workxvec)); 9279566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->ls_res, &mfqP->workfvec)); 928a7e14dcfSSatish Balay mfqP->nHist = 0; 929a7e14dcfSSatish Balay 9309566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fres)); 9319566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->m, &mfqP->RES)); 9329566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->work)); 9339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->work2)); 9349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->work3)); 9359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n + 1), &mfqP->mwork)); 9369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax - mfqP->n - 1, &mfqP->omega)); 9379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2, &mfqP->beta)); 9389566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n + 1, &mfqP->alpha)); 939a7e14dcfSSatish Balay 9409566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->H)); 9419566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q)); 9429566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q_tmp)); 9439566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L)); 9449566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_tmp)); 9459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_save)); 9469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->N)); 9479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->n + 1), &mfqP->M)); 9489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->npmax - mfqP->n - 1), &mfqP->Z)); 9499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau)); 9509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau_tmp)); 951a7e14dcfSSatish Balay mfqP->nmax = PetscMax(5 * mfqP->npmax, mfqP->n * (mfqP->n + 1) / 2); 9529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxwork)); 9539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxiwork)); 9549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->xmin)); 9559566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m, &mfqP->C)); 9569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Fdiff)); 9579566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->n, &mfqP->Disp)); 9589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->Gres)); 9599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Hres)); 9609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Gpoints)); 9619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->model_indices)); 9629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->last_model_indices)); 9639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->Xsubproblem)); 9649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Gdel)); 9659566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->Hdel)); 9669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n), &mfqP->indices)); 9679566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->iwork)); 9689566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m * mfqP->m, &mfqP->w)); 969125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 970125ddc8aSJason Sarich for (j = 0; j < mfqP->m; j++) { 971125ddc8aSJason Sarich if (i == j) { 972125ddc8aSJason Sarich mfqP->w[i + mfqP->m * j] = 1.0; 973125ddc8aSJason Sarich } else { 974125ddc8aSJason Sarich mfqP->w[i + mfqP->m * j] = 0.0; 975125ddc8aSJason Sarich } 976125ddc8aSJason Sarich } 977125ddc8aSJason Sarich } 978ad540459SPierre Jolivet for (i = 0; i < PetscMax(mfqP->m, mfqP->n); i++) mfqP->indices[i] = i; 9799566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(((PetscObject)tao)->comm, &mfqP->size)); 980f40bd260SBarry Smith if (mfqP->size > 1) { 9819566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localx)); 9829566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localxmin)); 9839566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localf)); 9849566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localfmin)); 9859566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxloc)); 9869566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxglob)); 9879566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfloc)); 9889566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfglob)); 989a7e14dcfSSatish Balay 9909566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(tao->solution, isxglob, mfqP->localx, isxloc, &mfqP->scatterx)); 9919566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(tao->ls_res, isfglob, mfqP->localf, isfloc, &mfqP->scatterf)); 992a7e14dcfSSatish Balay 9939566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isxloc)); 9949566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isxglob)); 9959566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isfloc)); 9969566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isfglob)); 997a7e14dcfSSatish Balay } 998a7e14dcfSSatish Balay 999a7e14dcfSSatish Balay if (!mfqP->usegqt) { 1000a7e14dcfSSatish Balay KSP ksp; 1001a7e14dcfSSatish Balay PC pc; 10029566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Xsubproblem, &mfqP->subx)); 10039566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->subxl)); 10049566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subb)); 10059566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subxu)); 10069566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subpdel)); 10079566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subndel)); 10089566063dSJacob Faibussowitsch PetscCall(TaoCreate(PETSC_COMM_SELF, &mfqP->subtao)); 10099566063dSJacob Faibussowitsch PetscCall(PetscObjectIncrementTabLevel((PetscObject)mfqP->subtao, (PetscObject)tao, 1)); 10109566063dSJacob Faibussowitsch PetscCall(TaoSetType(mfqP->subtao, TAOBNTR)); 10119566063dSJacob Faibussowitsch PetscCall(TaoSetOptionsPrefix(mfqP->subtao, "pounders_subsolver_")); 10129566063dSJacob Faibussowitsch PetscCall(TaoSetSolution(mfqP->subtao, mfqP->subx)); 10139566063dSJacob Faibussowitsch PetscCall(TaoSetObjectiveAndGradient(mfqP->subtao, NULL, pounders_fg, (void *)mfqP)); 10149566063dSJacob Faibussowitsch PetscCall(TaoSetMaximumIterations(mfqP->subtao, mfqP->gqt_maxits)); 10159566063dSJacob Faibussowitsch PetscCall(TaoSetFromOptions(mfqP->subtao)); 10169566063dSJacob Faibussowitsch PetscCall(TaoGetKSP(mfqP->subtao, &ksp)); 1017a7e14dcfSSatish Balay if (ksp) { 10189566063dSJacob Faibussowitsch PetscCall(KSPGetPC(ksp, &pc)); 10199566063dSJacob Faibussowitsch PetscCall(PCSetType(pc, PCNONE)); 1020a7e14dcfSSatish Balay } 10219566063dSJacob Faibussowitsch PetscCall(TaoSetVariableBounds(mfqP->subtao, mfqP->subxl, mfqP->subxu)); 10229566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Hres, &mfqP->subH)); 10239566063dSJacob Faibussowitsch PetscCall(TaoSetHessian(mfqP->subtao, mfqP->subH, mfqP->subH, pounders_h, (void *)mfqP)); 1024a7e14dcfSSatish Balay } 10253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1026a7e14dcfSSatish Balay } 1027a7e14dcfSSatish Balay 1028d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoDestroy_POUNDERS(Tao tao) 1029d71ae5a4SJacob Faibussowitsch { 1030a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1031a7e14dcfSSatish Balay PetscInt i; 1032a7e14dcfSSatish Balay 1033a7e14dcfSSatish Balay PetscFunctionBegin; 1034a7e14dcfSSatish Balay if (!mfqP->usegqt) { 10359566063dSJacob Faibussowitsch PetscCall(TaoDestroy(&mfqP->subtao)); 10369566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subx)); 10379566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subxl)); 10389566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subxu)); 10399566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subb)); 10409566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subpdel)); 10419566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subndel)); 10429566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mfqP->subH)); 1043a7e14dcfSSatish Balay } 10449566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Fres)); 10459566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->RES)); 10469566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->work)); 10479566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->work2)); 10489566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->work3)); 10499566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->mwork)); 10509566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->omega)); 10519566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->beta)); 10529566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->alpha)); 10539566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->H)); 10549566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Q)); 10559566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Q_tmp)); 10569566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->L)); 10579566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->L_tmp)); 10589566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->L_save)); 10599566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->N)); 10609566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->M)); 10619566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Z)); 10629566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->tau)); 10639566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->tau_tmp)); 10649566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->npmaxwork)); 10659566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->npmaxiwork)); 10669566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->xmin)); 10679566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->C)); 10689566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Fdiff)); 10699566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Disp)); 10709566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Gres)); 10719566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Hres)); 10729566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Gpoints)); 10739566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->model_indices)); 10749566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->last_model_indices)); 10759566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Xsubproblem)); 10769566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Gdel)); 10779566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Hdel)); 10789566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->indices)); 10799566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->iwork)); 10809566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->w)); 1081a7e14dcfSSatish Balay for (i = 0; i < mfqP->nHist; i++) { 10829566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->Xhist[i])); 10839566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->Fhist[i])); 1084a7e14dcfSSatish Balay } 10859566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->workxvec)); 10869566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->workfvec)); 10879566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Xhist)); 10889566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Fhist)); 1089a7e14dcfSSatish Balay 1090f40bd260SBarry Smith if (mfqP->size > 1) { 10919566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localx)); 10929566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localxmin)); 10939566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localf)); 10949566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localfmin)); 1095a7e14dcfSSatish Balay } 10969566063dSJacob Faibussowitsch PetscCall(PetscFree(tao->data)); 10973ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1098a7e14dcfSSatish Balay } 1099a7e14dcfSSatish Balay 1100d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoSetFromOptions_POUNDERS(Tao tao, PetscOptionItems *PetscOptionsObject) 1101d71ae5a4SJacob Faibussowitsch { 1102a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1103a7e14dcfSSatish Balay 1104a7e14dcfSSatish Balay PetscFunctionBegin; 1105d0609cedSBarry Smith PetscOptionsHeadBegin(PetscOptionsObject, "POUNDERS method for least-squares optimization"); 11069566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-tao_pounders_delta", "initial delta", "", mfqP->delta, &mfqP->delta0, NULL)); 1107ff2b74efSJason Sarich mfqP->delta = mfqP->delta0; 11089566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-tao_pounders_npmax", "max number of points in model", "", mfqP->npmax, &mfqP->npmax, NULL)); 11099566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-tao_pounders_gqt", "use gqt algorithm for subproblem", "", mfqP->usegqt, &mfqP->usegqt, NULL)); 1110d0609cedSBarry Smith PetscOptionsHeadEnd(); 11113ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1112a7e14dcfSSatish Balay } 1113a7e14dcfSSatish Balay 1114d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoView_POUNDERS(Tao tao, PetscViewer viewer) 1115d71ae5a4SJacob Faibussowitsch { 1116a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1117a7e14dcfSSatish Balay PetscBool isascii; 1118a7e14dcfSSatish Balay 1119a7e14dcfSSatish Balay PetscFunctionBegin; 11209566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1121a7e14dcfSSatish Balay if (isascii) { 11229566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "initial delta: %g\n", (double)mfqP->delta0)); 11239566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "final delta: %g\n", (double)mfqP->delta)); 112463a3b9bcSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "model points: %" PetscInt_FMT "\n", mfqP->nmodelpoints)); 1125a7e14dcfSSatish Balay if (mfqP->usegqt) { 11269566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "subproblem solver: gqt\n")); 1127a7e14dcfSSatish Balay } else { 11289566063dSJacob Faibussowitsch PetscCall(TaoView(mfqP->subtao, viewer)); 1129a7e14dcfSSatish Balay } 1130a7e14dcfSSatish Balay } 11313ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1132a7e14dcfSSatish Balay } 11331eb8069cSJason Sarich /*MC 11341eb8069cSJason Sarich TAOPOUNDERS - POUNDERS derivate-free model-based algorithm for nonlinear least squares 11351eb8069cSJason Sarich 11361eb8069cSJason Sarich Options Database Keys: 11371eb8069cSJason Sarich + -tao_pounders_delta - initial step length 11381eb8069cSJason Sarich . -tao_pounders_npmax - maximum number of points in model 11391eb8069cSJason Sarich - -tao_pounders_gqt - use gqt algorithm for subproblem instead of TRON 11401eb8069cSJason Sarich 11411eb8069cSJason Sarich Level: beginner 11421eb8069cSJason Sarich 11431eb8069cSJason Sarich M*/ 1144a7e14dcfSSatish Balay 1145d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode TaoCreate_POUNDERS(Tao tao) 1146d71ae5a4SJacob Faibussowitsch { 1147a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1148a7e14dcfSSatish Balay 1149a7e14dcfSSatish Balay PetscFunctionBegin; 1150a7e14dcfSSatish Balay tao->ops->setup = TaoSetUp_POUNDERS; 1151a7e14dcfSSatish Balay tao->ops->solve = TaoSolve_POUNDERS; 1152a7e14dcfSSatish Balay tao->ops->view = TaoView_POUNDERS; 1153a7e14dcfSSatish Balay tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS; 1154a7e14dcfSSatish Balay tao->ops->destroy = TaoDestroy_POUNDERS; 1155a7e14dcfSSatish Balay 11564dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mfqP)); 1157a7e14dcfSSatish Balay tao->data = (void *)mfqP; 1158*606f75f6SBarry Smith 11596552cf8aSJason Sarich /* Override default settings (unless already changed) */ 1160*606f75f6SBarry Smith PetscCall(TaoParametersInitialize(tao)); 1161*606f75f6SBarry Smith PetscObjectParameterSetDefault(tao, max_it, 2000); 1162*606f75f6SBarry Smith PetscObjectParameterSetDefault(tao, max_funcs, 4000); 1163*606f75f6SBarry Smith 1164*606f75f6SBarry Smith mfqP->npmax = PETSC_CURRENT; 1165ff2b74efSJason Sarich mfqP->delta0 = 0.1; 1166a7e14dcfSSatish Balay mfqP->delta = 0.1; 1167a7e14dcfSSatish Balay mfqP->deltamax = 1e3; 1168125ddc8aSJason Sarich mfqP->deltamin = 1e-6; 1169a8bd9cb3STodd Munson mfqP->c2 = 10.0; 1170a7e14dcfSSatish Balay mfqP->theta1 = 1e-5; 1171a7e14dcfSSatish Balay mfqP->theta2 = 1e-4; 1172a7e14dcfSSatish Balay mfqP->gamma0 = 0.5; 1173a7e14dcfSSatish Balay mfqP->gamma1 = 2.0; 1174a7e14dcfSSatish Balay mfqP->eta0 = 0.0; 1175a7e14dcfSSatish Balay mfqP->eta1 = 0.1; 1176f553c655STodd Munson mfqP->usegqt = PETSC_FALSE; 1177a7e14dcfSSatish Balay mfqP->gqt_rtol = 0.001; 1178a7e14dcfSSatish Balay mfqP->gqt_maxits = 50; 117983c8fe1dSLisandro Dalcin mfqP->workxvec = NULL; 11803ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1181a7e14dcfSSatish Balay } 1182