1aaa7dc30SBarry Smith #include <../src/tao/leastsquares/impls/pounders/pounders.h> 2a7e14dcfSSatish Balay 3*2a8381b2SBarry Smith static PetscErrorCode pounders_h(Tao subtao, Vec v, Mat H, Mat Hpre, PetscCtx ctx) 4d71ae5a4SJacob Faibussowitsch { 5a7e14dcfSSatish Balay PetscFunctionBegin; 63ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 7a7e14dcfSSatish Balay } 8ca88a2e0STodd Munson 9*2a8381b2SBarry Smith static PetscErrorCode pounders_fg(Tao subtao, Vec x, PetscReal *f, Vec g, PetscCtx ctx) 10d71ae5a4SJacob Faibussowitsch { 11a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)ctx; 12a7e14dcfSSatish Balay PetscReal d1, d2; 13f06e3bfaSBarry Smith 14a7e14dcfSSatish Balay PetscFunctionBegin; 15a7e14dcfSSatish Balay /* g = A*x (add b later)*/ 169566063dSJacob Faibussowitsch PetscCall(MatMult(mfqP->subH, x, g)); 17a7e14dcfSSatish Balay 18a7e14dcfSSatish Balay /* f = 1/2 * x'*(Ax) + b'*x */ 199566063dSJacob Faibussowitsch PetscCall(VecDot(x, g, &d1)); 209566063dSJacob Faibussowitsch PetscCall(VecDot(mfqP->subb, x, &d2)); 21a7e14dcfSSatish Balay *f = 0.5 * d1 + d2; 22a7e14dcfSSatish Balay 23a7e14dcfSSatish Balay /* now g = g + b */ 249566063dSJacob Faibussowitsch PetscCall(VecAXPY(g, 1.0, mfqP->subb)); 253ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 26a7e14dcfSSatish Balay } 27ca88a2e0STodd Munson 28d71ae5a4SJacob Faibussowitsch static PetscErrorCode pounders_feval(Tao tao, Vec x, Vec F, PetscReal *fsum) 29d71ae5a4SJacob Faibussowitsch { 30125ddc8aSJason Sarich TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 31125ddc8aSJason Sarich PetscInt i, row, col; 32125ddc8aSJason Sarich PetscReal fr, fc; 33691b26d3SBarry Smith 34125ddc8aSJason Sarich PetscFunctionBegin; 359566063dSJacob Faibussowitsch PetscCall(TaoComputeResidual(tao, x, F)); 36737f463aSAlp Dener if (tao->res_weights_v) { 379566063dSJacob Faibussowitsch PetscCall(VecPointwiseMult(mfqP->workfvec, tao->res_weights_v, F)); 389566063dSJacob Faibussowitsch PetscCall(VecDot(mfqP->workfvec, mfqP->workfvec, fsum)); 39737f463aSAlp Dener } else if (tao->res_weights_w) { 40125ddc8aSJason Sarich *fsum = 0; 41737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 42737f463aSAlp Dener row = tao->res_weights_rows[i]; 43737f463aSAlp Dener col = tao->res_weights_cols[i]; 449566063dSJacob Faibussowitsch PetscCall(VecGetValues(F, 1, &row, &fr)); 459566063dSJacob Faibussowitsch PetscCall(VecGetValues(F, 1, &col, &fc)); 46737f463aSAlp Dener *fsum += tao->res_weights_w[i] * fc * fr; 47125ddc8aSJason Sarich } 48125ddc8aSJason Sarich } else { 499566063dSJacob Faibussowitsch PetscCall(VecDot(F, F, fsum)); 50125ddc8aSJason Sarich } 519566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Least-squares residual norm: %20.19e\n", (double)*fsum)); 5276c63389SBarry Smith PetscCheck(!PetscIsInfOrNanReal(*fsum), PETSC_COMM_SELF, PETSC_ERR_USER, "User provided compute function generated infinity or NaN"); 533ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 54125ddc8aSJason Sarich } 55f06e3bfaSBarry Smith 56d71ae5a4SJacob Faibussowitsch static PetscErrorCode gqtwrap(Tao tao, PetscReal *gnorm, PetscReal *qmin) 57d71ae5a4SJacob Faibussowitsch { 586f4723b1SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 596f4723b1SBarry Smith PetscReal atol = 1.0e-5; 606f4723b1SBarry Smith #else 61a7e14dcfSSatish Balay PetscReal atol = 1.0e-10; 626f4723b1SBarry Smith #endif 63a7e14dcfSSatish Balay PetscInt info, its; 64a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 65a7e14dcfSSatish Balay 66a7e14dcfSSatish Balay PetscFunctionBegin; 67a7e14dcfSSatish Balay if (!mfqP->usegqt) { 68a7e14dcfSSatish Balay PetscReal maxval; 69a7e14dcfSSatish Balay PetscInt i, j; 70a7e14dcfSSatish Balay 719566063dSJacob Faibussowitsch PetscCall(VecSetValues(mfqP->subb, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 729566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(mfqP->subb)); 739566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(mfqP->subb)); 74a7e14dcfSSatish Balay 759566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->subx, 0.0)); 76a7e14dcfSSatish Balay 779566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->subndel, -1.0)); 789566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->subpdel, +1.0)); 79a7e14dcfSSatish Balay 80ce902467SBarry Smith /* Complete the lower triangle of the Hessian matrix */ 81a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 82ad540459SPierre Jolivet for (j = i + 1; j < mfqP->n; j++) mfqP->Hres[j + mfqP->n * i] = mfqP->Hres[mfqP->n * j + i]; 83a7e14dcfSSatish Balay } 849566063dSJacob Faibussowitsch PetscCall(MatSetValues(mfqP->subH, mfqP->n, mfqP->indices, mfqP->n, mfqP->indices, mfqP->Hres, INSERT_VALUES)); 859566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(mfqP->subH, MAT_FINAL_ASSEMBLY)); 869566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(mfqP->subH, MAT_FINAL_ASSEMBLY)); 87a7e14dcfSSatish Balay 889566063dSJacob Faibussowitsch PetscCall(TaoResetStatistics(mfqP->subtao)); 89fb842aefSJose E. Roman /* PetscCall(TaoSetTolerances(mfqP->subtao,*gnorm,*gnorm,PETSC_CURRENT)); */ 90a7e14dcfSSatish Balay /* enforce bound constraints -- experimental */ 91a7e14dcfSSatish Balay if (tao->XU && tao->XL) { 929566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->XU, mfqP->subxu)); 939566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subxu, -1.0, tao->solution)); 949566063dSJacob Faibussowitsch PetscCall(VecScale(mfqP->subxu, 1.0 / mfqP->delta)); 959566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->XL, mfqP->subxl)); 969566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subxl, -1.0, tao->solution)); 979566063dSJacob Faibussowitsch PetscCall(VecScale(mfqP->subxl, 1.0 / mfqP->delta)); 98a7e14dcfSSatish Balay 999566063dSJacob Faibussowitsch PetscCall(VecPointwiseMin(mfqP->subxu, mfqP->subxu, mfqP->subpdel)); 1009566063dSJacob Faibussowitsch PetscCall(VecPointwiseMax(mfqP->subxl, mfqP->subxl, mfqP->subndel)); 101a7e14dcfSSatish Balay } else { 1029566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subpdel, mfqP->subxu)); 1039566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subndel, mfqP->subxl)); 104a7e14dcfSSatish Balay } 105a7e14dcfSSatish Balay /* Make sure xu > xl */ 1069566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 1079566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 1089566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 1093c859ba3SBarry Smith PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "upper bound < lower bound in subproblem"); 110a7e14dcfSSatish Balay /* Make sure xu > tao->solution > xl */ 1119566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 1129566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx)); 1139566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 1143c859ba3SBarry Smith PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess < lower bound in subproblem"); 115a7e14dcfSSatish Balay 1169566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subx, mfqP->subpdel)); 1179566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 1189566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 1193c859ba3SBarry Smith PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess > upper bound in subproblem"); 120a7e14dcfSSatish Balay 1219566063dSJacob Faibussowitsch PetscCall(TaoSolve(mfqP->subtao)); 1229566063dSJacob Faibussowitsch PetscCall(TaoGetSolutionStatus(mfqP->subtao, NULL, qmin, NULL, NULL, NULL, NULL)); 123a7e14dcfSSatish Balay 124a7e14dcfSSatish Balay /* test bounds post-solution*/ 1259566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 1269566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx)); 1279566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 128a7e14dcfSSatish Balay if (maxval > 1e-5) { 1299566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "subproblem solution < lower bound\n")); 130a7e14dcfSSatish Balay tao->reason = TAO_DIVERGED_TR_REDUCTION; 131a7e14dcfSSatish Balay } 132a7e14dcfSSatish Balay 1339566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->subx, mfqP->subpdel)); 1349566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 1359566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 136a7e14dcfSSatish Balay if (maxval > 1e-5) { 1379566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "subproblem solution > upper bound\n")); 138a7e14dcfSSatish Balay tao->reason = TAO_DIVERGED_TR_REDUCTION; 139a7e14dcfSSatish Balay } 140a7e14dcfSSatish Balay } else { 1413ba16761SJacob Faibussowitsch PetscCall(gqt(mfqP->n, mfqP->Hres, mfqP->n, mfqP->Gres, 1.0, mfqP->gqt_rtol, atol, mfqP->gqt_maxits, gnorm, qmin, mfqP->Xsubproblem, &info, &its, mfqP->work, mfqP->work2, mfqP->work3)); 142a7e14dcfSSatish Balay } 143a7e14dcfSSatish Balay *qmin *= -1; 1443ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 145a7e14dcfSSatish Balay } 146a7e14dcfSSatish Balay 147d71ae5a4SJacob Faibussowitsch static PetscErrorCode pounders_update_res(Tao tao) 148d71ae5a4SJacob Faibussowitsch { 149125ddc8aSJason Sarich TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 150125ddc8aSJason Sarich PetscInt i, row, col; 1516497c311SBarry Smith PetscBLASInt blasn, blasn2, blasm, ione = 1; 1526c95174dSJason Sarich PetscReal zero = 0.0, one = 1.0, wii, factor; 153125ddc8aSJason Sarich 154691b26d3SBarry Smith PetscFunctionBegin; 1556497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->n, &blasn)); 1566497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->m, &blasm)); 1576497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->n * mfqP->n, &blasn2)); 158ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] = 0; 159ad540459SPierre Jolivet for (i = 0; i < mfqP->n * mfqP->n; i++) mfqP->Hres[i] = 0; 160125ddc8aSJason Sarich 161125ddc8aSJason Sarich /* Compute Gres= sum_ij[wij * (cjgi + cigj)] */ 162737f463aSAlp Dener if (tao->res_weights_v) { 1636c95174dSJason Sarich /* Vector(diagonal) weights: gres = sum_i(wii*ci*gi) */ 164125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 1659566063dSJacob Faibussowitsch PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &factor)); 1666c95174dSJason Sarich factor = factor * mfqP->C[i]; 167792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * i], &ione, mfqP->Gres, &ione)); 168125ddc8aSJason Sarich } 169125ddc8aSJason Sarich 170125ddc8aSJason Sarich /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 1716c95174dSJason Sarich /* vector(diagonal weights) Hres = sum_i(wii*(ci*Hi + gi * gi')*/ 172125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 1739566063dSJacob Faibussowitsch PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &wii)); 174125ddc8aSJason Sarich if (tao->niter > 1) { 175125ddc8aSJason Sarich factor = wii * mfqP->C[i]; 1766c95174dSJason Sarich /* add wii * ci * Hi */ 177792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione)); 178125ddc8aSJason Sarich } 1796c95174dSJason Sarich /* add wii * gi * gi' */ 180792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &wii, &mfqP->Fdiff[blasn * i], &blasn, &mfqP->Fdiff[blasn * i], &blasn, &one, mfqP->Hres, &blasn)); 181125ddc8aSJason Sarich } 182737f463aSAlp Dener } else if (tao->res_weights_w) { 183ce902467SBarry Smith /* General case: .5 * Gres= sum_ij[wij * (cjgi + cigj)] */ 184737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 185737f463aSAlp Dener row = tao->res_weights_rows[i]; 186737f463aSAlp Dener col = tao->res_weights_cols[i]; 187ce902467SBarry Smith 188737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0; 189792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * row], &ione, mfqP->Gres, &ione)); 190737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0; 191792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * col], &ione, mfqP->Gres, &ione)); 192ce902467SBarry Smith } 193ce902467SBarry Smith 194ce902467SBarry Smith /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 1956c95174dSJason Sarich /* .5 * sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 196737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 197737f463aSAlp Dener row = tao->res_weights_rows[i]; 198737f463aSAlp Dener col = tao->res_weights_cols[i]; 199737f463aSAlp Dener factor = tao->res_weights_w[i] / 2.0; 200125ddc8aSJason Sarich /* add wij * gi gj' + wij * gj gi' */ 201792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * row], &blasn, &mfqP->Fdiff[blasn * col], &blasn, &one, mfqP->Hres, &blasn)); 202792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * col], &blasn, &mfqP->Fdiff[blasn * row], &blasn, &one, mfqP->Hres, &blasn)); 203125ddc8aSJason Sarich } 204125ddc8aSJason Sarich if (tao->niter > 1) { 205737f463aSAlp Dener for (i = 0; i < tao->res_weights_n; i++) { 206737f463aSAlp Dener row = tao->res_weights_rows[i]; 207737f463aSAlp Dener col = tao->res_weights_cols[i]; 208125ddc8aSJason Sarich 209125ddc8aSJason Sarich /* add wij*cj*Hi */ 210737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0; 211792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[row], &blasm, mfqP->Hres, &ione)); 212125ddc8aSJason Sarich 213125ddc8aSJason Sarich /* add wij*ci*Hj */ 214737f463aSAlp Dener factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0; 215792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[col], &blasm, mfqP->Hres, &ione)); 216125ddc8aSJason Sarich } 217125ddc8aSJason Sarich } 218125ddc8aSJason Sarich } else { 219ce902467SBarry Smith /* Default: Gres= sum_i[cigi] = G*c' */ 2209566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Identity weights\n")); 221792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->C, &ione, &zero, mfqP->Gres, &ione)); 222ce902467SBarry Smith 223ce902467SBarry Smith /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 2246c95174dSJason Sarich /* Hres = G*G' + 0.5 sum {F(xkin,i)*H(:,:,i)} */ 225792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->Fdiff, &blasn, &zero, mfqP->Hres, &blasn)); 226125ddc8aSJason Sarich 227125ddc8aSJason Sarich /* sum(F(xkin,i)*H(:,:,i)) */ 228125ddc8aSJason Sarich if (tao->niter > 1) { 229125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 2306c95174dSJason Sarich factor = mfqP->C[i]; 231792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione)); 232125ddc8aSJason Sarich } 233125ddc8aSJason Sarich } 234125ddc8aSJason Sarich } 2353ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 236125ddc8aSJason Sarich } 237ca88a2e0STodd Munson 238d71ae5a4SJacob Faibussowitsch static PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi) 239d71ae5a4SJacob Faibussowitsch { 240a7e14dcfSSatish Balay /* Phi = .5*[x(1)^2 sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */ 241a7e14dcfSSatish Balay PetscInt i, j, k; 242a7e14dcfSSatish Balay PetscReal sqrt2 = PetscSqrtReal(2.0); 243f06e3bfaSBarry Smith 244a7e14dcfSSatish Balay PetscFunctionBegin; 245a7e14dcfSSatish Balay j = 0; 246a7e14dcfSSatish Balay for (i = 0; i < n; i++) { 247a7e14dcfSSatish Balay phi[j] = 0.5 * x[i] * x[i]; 248a7e14dcfSSatish Balay j++; 249a7e14dcfSSatish Balay for (k = i + 1; k < n; k++) { 250a7e14dcfSSatish Balay phi[j] = x[i] * x[k] / sqrt2; 251a7e14dcfSSatish Balay j++; 252a7e14dcfSSatish Balay } 253a7e14dcfSSatish Balay } 2543ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 255a7e14dcfSSatish Balay } 256a7e14dcfSSatish Balay 257d71ae5a4SJacob Faibussowitsch static PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP) 258d71ae5a4SJacob Faibussowitsch { 259a7e14dcfSSatish Balay /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x' 260a7e14dcfSSatish Balay that satisfies the interpolation conditions Q(X[:,j]) = f(j) 261a7e14dcfSSatish Balay for j=1,...,m and with a Hessian matrix of least Frobenius norm */ 262a7e14dcfSSatish Balay 263a7e14dcfSSatish Balay /* NB --we are ignoring c */ 264a7e14dcfSSatish Balay PetscInt i, j, k, num, np = mfqP->nmodelpoints; 265a7e14dcfSSatish Balay PetscReal one = 1.0, zero = 0.0, negone = -1.0; 2666497c311SBarry Smith PetscBLASInt blasnpmax, blasnplus1, blasnp, blasint, blasint2; 267f06e3bfaSBarry Smith PetscBLASInt info, ione = 1; 268a7e14dcfSSatish Balay PetscReal sqrt2 = PetscSqrtReal(2.0); 269a7e14dcfSSatish Balay 270a7e14dcfSSatish Balay PetscFunctionBegin; 2716497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax)); 2726497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->n + 1, &blasnplus1)); 2736497c311SBarry Smith PetscCall(PetscBLASIntCast(np, &blasnp)); 2746497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->n * (mfqP->n + 1) / 2, &blasint)); 2756497c311SBarry Smith PetscCall(PetscBLASIntCast(np - mfqP->n - 1, &blasint2)); 276ad540459SPierre Jolivet for (i = 0; i < mfqP->n * mfqP->m; i++) mfqP->Gdel[i] = 0; 277ad540459SPierre Jolivet for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; i++) mfqP->Hdel[i] = 0; 278a7e14dcfSSatish Balay 279a7e14dcfSSatish Balay /* factor M */ 280792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&blasnplus1, &blasnp, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &info)); 28163a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrf returned with value %" PetscBLASInt_FMT, info); 282a7e14dcfSSatish Balay 283a7e14dcfSSatish Balay if (np == mfqP->n + 1) { 284ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax - mfqP->n - 1; i++) mfqP->omega[i] = 0.0; 285ad540459SPierre Jolivet for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->beta[i] = 0.0; 286a7e14dcfSSatish Balay } else { 287a7e14dcfSSatish Balay /* Let Ltmp = (L'*L) */ 288792fecdfSBarry Smith PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &blasint2, &blasint2, &blasint, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &zero, mfqP->L_tmp, &blasint)); 289a7e14dcfSSatish Balay 290a7e14dcfSSatish Balay /* factor Ltmp */ 291792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &blasint2, mfqP->L_tmp, &blasint, &info)); 29263a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrf returned with value %" PetscBLASInt_FMT, info); 293a7e14dcfSSatish Balay } 294a7e14dcfSSatish Balay 295a7e14dcfSSatish Balay for (k = 0; k < mfqP->m; k++) { 296a7e14dcfSSatish Balay if (np != mfqP->n + 1) { 297a7e14dcfSSatish Balay /* Solve L'*L*Omega = Z' * RESk*/ 298792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasnp, &blasint2, &one, mfqP->Z, &blasnpmax, &mfqP->RES[mfqP->npmax * k], &ione, &zero, mfqP->omega, &ione)); 299792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &blasint2, &ione, mfqP->L_tmp, &blasint, mfqP->omega, &blasint2, &info)); 30063a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrs returned with value %" PetscBLASInt_FMT, info); 301a7e14dcfSSatish Balay 302a7e14dcfSSatish Balay /* Beta = L*Omega */ 303792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasint, &blasint2, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, mfqP->omega, &ione, &zero, mfqP->beta, &ione)); 304a7e14dcfSSatish Balay } 305a7e14dcfSSatish Balay 306a7e14dcfSSatish Balay /* solve M'*Alpha = RESk - N'*Beta */ 307792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasint, &blasnp, &negone, mfqP->N, &blasint, mfqP->beta, &ione, &one, &mfqP->RES[mfqP->npmax * k], &ione)); 308792fecdfSBarry Smith PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &blasnplus1, &ione, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &mfqP->RES[mfqP->npmax * k], &blasnplus1, &info)); 30963a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrs returned with value %" PetscBLASInt_FMT, info); 310a7e14dcfSSatish Balay 311a7e14dcfSSatish Balay /* Gdel(:,k) = Alpha(2:n+1) */ 312ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->Gdel[i + mfqP->n * k] = mfqP->RES[mfqP->npmax * k + i + 1]; 313a7e14dcfSSatish Balay 314a7e14dcfSSatish Balay /* Set Hdels */ 315a7e14dcfSSatish Balay num = 0; 316a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 317a7e14dcfSSatish Balay /* H[i,i,k] = Beta(num) */ 318a7e14dcfSSatish Balay mfqP->Hdel[(i * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num]; 319a7e14dcfSSatish Balay num++; 320a7e14dcfSSatish Balay for (j = i + 1; j < mfqP->n; j++) { 321a7e14dcfSSatish Balay /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */ 322a7e14dcfSSatish Balay mfqP->Hdel[(j * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num] / sqrt2; 323a7e14dcfSSatish Balay mfqP->Hdel[(i * mfqP->n + j) * mfqP->m + k] = mfqP->beta[num] / sqrt2; 324a7e14dcfSSatish Balay num++; 325a7e14dcfSSatish Balay } 326a7e14dcfSSatish Balay } 327a7e14dcfSSatish Balay } 3283ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 329a7e14dcfSSatish Balay } 330a7e14dcfSSatish Balay 331d71ae5a4SJacob Faibussowitsch static PetscErrorCode morepoints(TAO_POUNDERS *mfqP) 332d71ae5a4SJacob Faibussowitsch { 333a7e14dcfSSatish Balay /* Assumes mfqP->model_indices[0] is minimum index 334a7e14dcfSSatish Balay Finishes adding points to mfqP->model_indices (up to npmax) 335a7e14dcfSSatish Balay Computes L,Z,M,N 336a7e14dcfSSatish Balay np is actual number of points in model (should equal npmax?) */ 337a7e14dcfSSatish Balay PetscInt point, i, j, offset; 338a7e14dcfSSatish Balay PetscInt reject; 3396497c311SBarry Smith PetscBLASInt blasn, blasnpmax, blasnplus1, info, blasnmax, blasint, blasint2, blasnp, blasmaxmn; 3405e081366SBarry Smith const PetscReal *x; 3415e081366SBarry Smith PetscReal normd; 342a7e14dcfSSatish Balay 343f06e3bfaSBarry Smith PetscFunctionBegin; 3446497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax)); 3456497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->n, &blasn)); 3466497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->nmax, &blasnmax)); 3476497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->n + 1, &blasnplus1)); 3486497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->n, &blasnp)); 349a7e14dcfSSatish Balay /* Initialize M,N */ 350a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 3519566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x)); 352a7e14dcfSSatish Balay mfqP->M[(mfqP->n + 1) * i] = 1.0; 353ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * i)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 3549566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x)); 3559566063dSJacob Faibussowitsch PetscCall(phi2eval(&mfqP->M[1 + ((mfqP->n + 1) * i)], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * i])); 356a7e14dcfSSatish Balay } 357a7e14dcfSSatish Balay 358a7e14dcfSSatish Balay /* Now we add points until we have npmax starting with the most recent ones */ 359a7e14dcfSSatish Balay point = mfqP->nHist - 1; 360a7e14dcfSSatish Balay mfqP->nmodelpoints = mfqP->n + 1; 361a7e14dcfSSatish Balay while (mfqP->nmodelpoints < mfqP->npmax && point >= 0) { 362a7e14dcfSSatish Balay /* Reject any points already in the model */ 363a7e14dcfSSatish Balay reject = 0; 364a7e14dcfSSatish Balay for (j = 0; j < mfqP->n + 1; j++) { 365a7e14dcfSSatish Balay if (point == mfqP->model_indices[j]) { 366a7e14dcfSSatish Balay reject = 1; 367a7e14dcfSSatish Balay break; 368a7e14dcfSSatish Balay } 369a7e14dcfSSatish Balay } 370a7e14dcfSSatish Balay 371a7e14dcfSSatish Balay /* Reject if norm(d) >c2 */ 372a7e14dcfSSatish Balay if (!reject) { 3739566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[point], mfqP->workxvec)); 3749566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->workxvec, -1.0, mfqP->Xhist[mfqP->minindex])); 3759566063dSJacob Faibussowitsch PetscCall(VecNorm(mfqP->workxvec, NORM_2, &normd)); 376a7e14dcfSSatish Balay normd /= mfqP->delta; 377ad540459SPierre Jolivet if (normd > mfqP->c2) reject = 1; 378a7e14dcfSSatish Balay } 379f06e3bfaSBarry Smith if (reject) { 380a7e14dcfSSatish Balay point--; 381a7e14dcfSSatish Balay continue; 382a7e14dcfSSatish Balay } 383a7e14dcfSSatish Balay 3849566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[point], &x)); 385a7e14dcfSSatish Balay mfqP->M[(mfqP->n + 1) * mfqP->nmodelpoints] = 1.0; 386ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * mfqP->nmodelpoints)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 3879566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[point], &x)); 3889566063dSJacob Faibussowitsch PetscCall(phi2eval(&mfqP->M[1 + (mfqP->n + 1) * mfqP->nmodelpoints], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * (mfqP->nmodelpoints)])); 389a7e14dcfSSatish Balay 390a7e14dcfSSatish Balay /* Update QR factorization */ 391a7e14dcfSSatish Balay /* Copy M' to Q_tmp */ 392a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 393ad540459SPierre Jolivet for (j = 0; j < mfqP->npmax; j++) mfqP->Q_tmp[j + mfqP->npmax * i] = mfqP->M[i + (mfqP->n + 1) * j]; 394a7e14dcfSSatish Balay } 3956497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->nmodelpoints + 1, &blasnp)); 396a7e14dcfSSatish Balay /* Q_tmp,R = qr(M') */ 3976497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(mfqP->m, mfqP->n + 1), &blasmaxmn)); 398792fecdfSBarry Smith PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->mwork, &blasmaxmn, &info)); 39963a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine geqrf returned with value %" PetscBLASInt_FMT, info); 400a7e14dcfSSatish Balay 401a7e14dcfSSatish Balay /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */ 402a7e14dcfSSatish Balay /* L = N*Qtmp */ 4036497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->n * (mfqP->n + 1) / 2, &blasint2)); 404a7e14dcfSSatish Balay /* Copy N to L_tmp */ 405ad540459SPierre Jolivet for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2 * mfqP->npmax; i++) mfqP->L_tmp[i] = mfqP->N[i]; 406a7e14dcfSSatish Balay /* Copy L_save to L_tmp */ 407a7e14dcfSSatish Balay 408a7e14dcfSSatish Balay /* L_tmp = N*Qtmp' */ 409792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasint2, &blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->L_tmp, &blasint2, mfqP->npmaxwork, &blasnmax, &info)); 41063a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info); 411a7e14dcfSSatish Balay 412a7e14dcfSSatish Balay /* Copy L_tmp to L_save */ 413ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L_save[i] = mfqP->L_tmp[i]; 414a7e14dcfSSatish Balay 415a7e14dcfSSatish Balay /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */ 4166497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->nmodelpoints - mfqP->n, &blasint)); 4179566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 418792fecdfSBarry Smith PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &blasint2, &blasint, &mfqP->L_tmp[(mfqP->n + 1) * blasint2], &blasint2, mfqP->beta, mfqP->work, &blasn, mfqP->work, &blasn, mfqP->npmaxwork, &blasnmax, &info)); 4199566063dSJacob Faibussowitsch PetscCall(PetscFPTrapPop()); 42063a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine gesvd returned with value %" PetscBLASInt_FMT, info); 421a7e14dcfSSatish Balay 422a7e14dcfSSatish Balay if (mfqP->beta[PetscMin(blasint, blasint2) - 1] > mfqP->theta2) { 423a7e14dcfSSatish Balay /* accept point */ 424a7e14dcfSSatish Balay mfqP->model_indices[mfqP->nmodelpoints] = point; 425a7e14dcfSSatish Balay /* Copy Q_tmp to Q */ 426ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q[i] = mfqP->Q_tmp[i]; 427ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax; i++) mfqP->tau[i] = mfqP->tau_tmp[i]; 428a7e14dcfSSatish Balay mfqP->nmodelpoints++; 4296497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blasnp)); 430a7e14dcfSSatish Balay 431a7e14dcfSSatish Balay /* Copy L_save to L */ 432ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = mfqP->L_save[i]; 433a7e14dcfSSatish Balay } 434a7e14dcfSSatish Balay point--; 435a7e14dcfSSatish Balay } 436a7e14dcfSSatish Balay 4376497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blasnp)); 438a7e14dcfSSatish Balay /* Copy Q(:,n+2:np) to Z */ 439a7e14dcfSSatish Balay /* First set Q_tmp to I */ 440ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q_tmp[i] = 0.0; 441ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax; i++) mfqP->Q_tmp[i + mfqP->npmax * i] = 1.0; 442a7e14dcfSSatish Balay 443a7e14dcfSSatish Balay /* Q_tmp = I * Q */ 444792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasnp, &blasnp, &blasnplus1, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info)); 44563a3b9bcSJacob Faibussowitsch PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info); 446a7e14dcfSSatish Balay 447a7e14dcfSSatish Balay /* Copy Q_tmp(:,n+2:np) to Z) */ 448a7e14dcfSSatish Balay offset = mfqP->npmax * (mfqP->n + 1); 449ad540459SPierre Jolivet for (i = offset; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Z[i - offset] = mfqP->Q_tmp[i]; 450a7e14dcfSSatish Balay 451a7e14dcfSSatish Balay if (mfqP->nmodelpoints == mfqP->n + 1) { 452a7e14dcfSSatish Balay /* Set L to I_{n+1} */ 453ad540459SPierre Jolivet for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = 0.0; 454ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->L[(mfqP->n * (mfqP->n + 1) / 2) * i + i] = 1.0; 455a7e14dcfSSatish Balay } 4563ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 457a7e14dcfSSatish Balay } 458a7e14dcfSSatish Balay 459a7e14dcfSSatish Balay /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */ 460d71ae5a4SJacob Faibussowitsch static PetscErrorCode addpoint(Tao tao, TAO_POUNDERS *mfqP, PetscInt index) 461d71ae5a4SJacob Faibussowitsch { 462a7e14dcfSSatish Balay PetscFunctionBegin; 463a7e14dcfSSatish Balay /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/ 4649566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->Xhist[0], &mfqP->Xhist[mfqP->nHist])); 4659566063dSJacob Faibussowitsch PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, &mfqP->Q_tmp[index * mfqP->npmax], INSERT_VALUES)); 4669566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist])); 4679566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist])); 4689566063dSJacob Faibussowitsch PetscCall(VecAYPX(mfqP->Xhist[mfqP->nHist], mfqP->delta, mfqP->Xhist[mfqP->minindex])); 469a7e14dcfSSatish Balay 470a7e14dcfSSatish Balay /* Project into feasible region */ 47148a46eb9SPierre Jolivet if (tao->XU && tao->XL) PetscCall(VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU, mfqP->Xhist[mfqP->nHist])); 472a7e14dcfSSatish Balay 473a7e14dcfSSatish Balay /* Compute value of new vector */ 4749566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->Fhist[0], &mfqP->Fhist[mfqP->nHist])); 475a7e14dcfSSatish Balay CHKMEMQ; 4769566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist])); 477a7e14dcfSSatish Balay 478a7e14dcfSSatish Balay /* Add new vector to model */ 479a7e14dcfSSatish Balay mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist; 480a7e14dcfSSatish Balay mfqP->nmodelpoints++; 481a7e14dcfSSatish Balay mfqP->nHist++; 4823ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 483a7e14dcfSSatish Balay } 484a7e14dcfSSatish Balay 485d71ae5a4SJacob Faibussowitsch static PetscErrorCode modelimprove(Tao tao, TAO_POUNDERS *mfqP, PetscInt addallpoints) 486d71ae5a4SJacob Faibussowitsch { 487a7e14dcfSSatish Balay /* modeld = Q(:,np+1:n)' */ 488a7e14dcfSSatish Balay PetscInt i, j, minindex = 0; 489e270355aSBarry Smith PetscReal dp, half = 0.5, one = 1.0, minvalue = PETSC_INFINITY; 4906497c311SBarry Smith PetscBLASInt blasn, blasnpmax, blask, info; 4916497c311SBarry Smith PetscBLASInt blas1 = 1, blasnmax; 492f06e3bfaSBarry Smith 493362febeeSStefano Zampini PetscFunctionBegin; 4946497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->n, &blasn)); 4956497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax)); 4966497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask)); 4976497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->nmax, &blasnmax)); 4986497c311SBarry Smith 499a7e14dcfSSatish Balay /* Qtmp = I(n x n) */ 500a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 501ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i + mfqP->npmax * j] = 0.0; 502a7e14dcfSSatish Balay } 503ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[j + mfqP->npmax * j] = 1.0; 504a7e14dcfSSatish Balay 505a7e14dcfSSatish Balay /* Qtmp = Q * I */ 506792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasn, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info)); 507a7e14dcfSSatish Balay 508a7e14dcfSSatish Balay for (i = mfqP->nmodelpoints; i < mfqP->n; i++) { 509792fecdfSBarry Smith PetscCallBLAS("BLASdot", dp = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->Gres, &blas1)); 510a7e14dcfSSatish Balay if (dp > 0.0) { /* Model says use the other direction! */ 511ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i * mfqP->npmax + j] *= -1; 512a7e14dcfSSatish Balay } 513a7e14dcfSSatish Balay /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */ 514ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->work2[j] = mfqP->Gres[j]; 515792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &half, mfqP->Hres, &blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, &one, mfqP->work2, &blas1)); 516792fecdfSBarry Smith PetscCallBLAS("BLASdot", mfqP->work[i] = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->work2, &blas1)); 517a7e14dcfSSatish Balay if (i == mfqP->nmodelpoints || mfqP->work[i] < minvalue) { 518a7e14dcfSSatish Balay minindex = i; 519a7e14dcfSSatish Balay minvalue = mfqP->work[i]; 520a7e14dcfSSatish Balay } 52148a46eb9SPierre Jolivet if (addallpoints != 0) PetscCall(addpoint(tao, mfqP, i)); 522a7e14dcfSSatish Balay } 52348a46eb9SPierre Jolivet if (!addallpoints) PetscCall(addpoint(tao, mfqP, minindex)); 5243ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 525a7e14dcfSSatish Balay } 526a7e14dcfSSatish Balay 527d71ae5a4SJacob Faibussowitsch static PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, PetscReal c) 528d71ae5a4SJacob Faibussowitsch { 529a7e14dcfSSatish Balay PetscInt i, j; 5306497c311SBarry Smith PetscBLASInt blasm, blasj, blask, blasn, ione = 1, info; 5316497c311SBarry Smith PetscBLASInt blasnpmax, blasmaxmn; 532a7e14dcfSSatish Balay PetscReal proj, normd; 5335e081366SBarry Smith const PetscReal *x; 534a7e14dcfSSatish Balay 535f06e3bfaSBarry Smith PetscFunctionBegin; 5366497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax)); 5376497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->m, &blasm)); 5386497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->n, &blasn)); 539a7e14dcfSSatish Balay for (i = mfqP->nHist - 1; i >= 0; i--) { 5409566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[i], &x)); 541ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->work[j] = (x[j] - xmin[j]) / mfqP->delta; 5429566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[i], &x)); 543792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, mfqP->work2, &ione)); 544792fecdfSBarry Smith PetscCallBLAS("BLASnrm2", normd = BLASnrm2_(&blasn, mfqP->work, &ione)); 545a8bd9cb3STodd Munson if (normd <= c) { 54657508eceSPierre Jolivet PetscCall(PetscBLASIntCast(PetscMax(mfqP->n - mfqP->nmodelpoints, 0), &blasj)); 547a7e14dcfSSatish Balay if (!mfqP->q_is_I) { 548a7e14dcfSSatish Balay /* project D onto null */ 5496497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask)); 550792fecdfSBarry Smith PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &ione, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->work2, &ione, mfqP->mwork, &blasm, &info)); 55163a3b9bcSJacob Faibussowitsch PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "ormqr returned value %" PetscBLASInt_FMT, info); 552a7e14dcfSSatish Balay } 553792fecdfSBarry Smith PetscCallBLAS("BLASnrm2", proj = BLASnrm2_(&blasj, &mfqP->work2[mfqP->nmodelpoints], &ione)); 554a7e14dcfSSatish Balay 555a7e14dcfSSatish Balay if (proj >= mfqP->theta1) { /* add this index to model */ 556a7e14dcfSSatish Balay mfqP->model_indices[mfqP->nmodelpoints] = i; 557a7e14dcfSSatish Balay mfqP->nmodelpoints++; 558792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, &mfqP->Q_tmp[mfqP->npmax * (mfqP->nmodelpoints - 1)], &ione)); 5596497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->npmax * (mfqP->nmodelpoints), &blask)); 560792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blask, mfqP->Q_tmp, &ione, mfqP->Q, &ione)); 5616497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask)); 5626497c311SBarry Smith PetscCall(PetscBLASIntCast(PetscMax(mfqP->m, mfqP->n), &blasmaxmn)); 563792fecdfSBarry Smith PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->mwork, &blasmaxmn, &info)); 56463a3b9bcSJacob Faibussowitsch PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "geqrf returned value %" PetscBLASInt_FMT, info); 565a7e14dcfSSatish Balay mfqP->q_is_I = 0; 566a7e14dcfSSatish Balay } 567ad540459SPierre Jolivet if (mfqP->nmodelpoints == mfqP->n) break; 568a7e14dcfSSatish Balay } 569a7e14dcfSSatish Balay } 5703ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 571a7e14dcfSSatish Balay } 572f06e3bfaSBarry Smith 573d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoSolve_POUNDERS(Tao tao) 574d71ae5a4SJacob Faibussowitsch { 575a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 5768931d482SJason Sarich PetscInt i, ii, j, k, l; 577a7e14dcfSSatish Balay PetscReal step = 1.0; 578a7e14dcfSSatish Balay PetscInt low, high; 579a7e14dcfSSatish Balay PetscReal minnorm; 5805e081366SBarry Smith PetscReal *x, *f; 5815e081366SBarry Smith const PetscReal *xmint, *fmin; 582bd026e97SJed Brown PetscReal deltaold; 583a7e14dcfSSatish Balay PetscReal gnorm; 584a7e14dcfSSatish Balay PetscBLASInt info, ione = 1, iblas; 585ff2b74efSJason Sarich PetscBool valid, same; 586a7e14dcfSSatish Balay PetscReal mdec, rho, normxsp; 587a7e14dcfSSatish Balay PetscReal one = 1.0, zero = 0.0, ratio; 588125ddc8aSJason Sarich PetscBLASInt blasm, blasn, blasncopy, blasnpmax; 589a7bbbda8SBarry Smith static PetscBool set = PETSC_FALSE; 590a7e14dcfSSatish Balay 591a7e14dcfSSatish Balay /* n = # of parameters 592a7e14dcfSSatish Balay m = dimension (components) of function */ 593a7e14dcfSSatish Balay PetscFunctionBegin; 5949566063dSJacob Faibussowitsch PetscCall(PetscCitationsRegister("@article{UNEDF0,\n" 595a7bbbda8SBarry Smith "title = {Nuclear energy density optimization},\n" 596a7bbbda8SBarry Smith "author = {Kortelainen, M. and Lesinski, T. and Mor\'e, J. and Nazarewicz, W.\n" 597a7bbbda8SBarry Smith " and Sarich, J. and Schunck, N. and Stoitsov, M. V. and Wild, S. },\n" 598a7bbbda8SBarry Smith "journal = {Phys. Rev. C},\n" 599a7bbbda8SBarry Smith "volume = {82},\n" 600a7bbbda8SBarry Smith "number = {2},\n" 601a7bbbda8SBarry Smith "pages = {024313},\n" 602a7bbbda8SBarry Smith "numpages = {18},\n" 603a7bbbda8SBarry Smith "year = {2010},\n" 604a7bbbda8SBarry Smith "month = {Aug},\n" 6059371c9d4SSatish Balay "doi = {10.1103/PhysRevC.82.024313}\n}\n", 6069371c9d4SSatish Balay &set)); 607e6d4cb7fSJason Sarich tao->niter = 0; 608a7e14dcfSSatish Balay if (tao->XL && tao->XU) { 609a7e14dcfSSatish Balay /* Check x0 <= XU */ 610a7e14dcfSSatish Balay PetscReal val; 611ce902467SBarry Smith 6129566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->solution, mfqP->Xhist[0])); 6139566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU)); 6149566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 6153c859ba3SBarry Smith PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 > upper bound"); 616a7e14dcfSSatish Balay 617a7e14dcfSSatish Balay /* Check x0 >= xl */ 6189566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->XL, mfqP->Xhist[0])); 6199566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->solution)); 6209566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 6213c859ba3SBarry Smith PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 < lower bound"); 622f06e3bfaSBarry Smith 623a7e14dcfSSatish Balay /* Check x0 + delta < XU -- should be able to get around this eventually */ 624a7e14dcfSSatish Balay 6259566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->Xhist[0], mfqP->delta)); 6269566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], 1.0, tao->solution)); 6279566063dSJacob Faibussowitsch PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU)); 6289566063dSJacob Faibussowitsch PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 6293c859ba3SBarry Smith PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 + delta > upper bound"); 630a7e14dcfSSatish Balay } 631a7e14dcfSSatish Balay 6326497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->m, &blasm)); 6336497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->n, &blasn)); 6346497c311SBarry Smith PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax)); 635ce902467SBarry Smith for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; ++i) mfqP->H[i] = 0; 636a7e14dcfSSatish Balay 6379566063dSJacob Faibussowitsch PetscCall(VecCopy(tao->solution, mfqP->Xhist[0])); 638ce902467SBarry Smith 639ce902467SBarry Smith /* This provides enough information to approximate the gradient of the objective */ 640ce902467SBarry Smith /* using a forward difference scheme. */ 641ce902467SBarry Smith 6429566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Initialize simplex; delta = %10.9e\n", (double)mfqP->delta)); 6439566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[0], mfqP->Fhist[0], &mfqP->Fres[0])); 644a7e14dcfSSatish Balay mfqP->minindex = 0; 645a7e14dcfSSatish Balay minnorm = mfqP->Fres[0]; 646ff2b74efSJason Sarich 6479566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(mfqP->Xhist[0], &low, &high)); 648805332fbSTodd Munson for (i = 1; i < mfqP->n + 1; ++i) { 6499566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[0], mfqP->Xhist[i])); 650ce902467SBarry Smith 651a7e14dcfSSatish Balay if (i - 1 >= low && i - 1 < high) { 6529566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Xhist[i], &x)); 653a7e14dcfSSatish Balay x[i - 1 - low] += mfqP->delta; 6549566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Xhist[i], &x)); 655a7e14dcfSSatish Balay } 656a7e14dcfSSatish Balay CHKMEMQ; 6579566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[i], mfqP->Fhist[i], &mfqP->Fres[i])); 658a7e14dcfSSatish Balay if (mfqP->Fres[i] < minnorm) { 659a7e14dcfSSatish Balay mfqP->minindex = i; 660a7e14dcfSSatish Balay minnorm = mfqP->Fres[i]; 661a7e14dcfSSatish Balay } 662a7e14dcfSSatish Balay } 6639566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 6649566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res)); 6659566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Finalize simplex; minnorm = %10.9e\n", (double)minnorm)); 666ce902467SBarry Smith 667a7e14dcfSSatish Balay /* Gather mpi vecs to one big local vec */ 668a7e14dcfSSatish Balay 669a7e14dcfSSatish Balay /* Begin serial code */ 670a7e14dcfSSatish Balay 671a7e14dcfSSatish Balay /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */ 672a7e14dcfSSatish Balay /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */ 673a7e14dcfSSatish Balay /* (Column oriented for blas calls) */ 674a7e14dcfSSatish Balay ii = 0; 675a7e14dcfSSatish Balay 676835f2295SStefano Zampini PetscCall(PetscInfo(tao, "Build matrix: %d\n", mfqP->size)); 677805332fbSTodd Munson if (1 == mfqP->size) { 6789566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 679a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 6809566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 6819566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 682a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 683a7e14dcfSSatish Balay if (i == mfqP->minindex) continue; 684a7e14dcfSSatish Balay 6859566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Xhist[i], &x)); 686ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 6879566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Xhist[i], &x)); 688a7e14dcfSSatish Balay 6899566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Fhist[i], &f)); 690ad540459SPierre Jolivet for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; 6919566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Fhist[i], &f)); 69294e8dccaSTodd Munson 693a7e14dcfSSatish Balay mfqP->model_indices[ii++] = i; 694a7e14dcfSSatish Balay } 695ad540459SPierre Jolivet for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j]; 6969566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 697a7e14dcfSSatish Balay } else { 6989566063dSJacob Faibussowitsch PetscCall(VecSet(mfqP->localxmin, 0)); 6999566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD)); 7009566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD)); 701ce902467SBarry Smith 7029566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->localxmin, &xmint)); 703a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 7049566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->localxmin, &xmint)); 705a7e14dcfSSatish Balay 7069566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD)); 7079566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD)); 7089566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->localfmin, &fmin)); 709a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 710a7e14dcfSSatish Balay if (i == mfqP->minindex) continue; 711a7e14dcfSSatish Balay 7129566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD)); 7139566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD)); 7149566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->localx, &x)); 715ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 7169566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->localx, &x)); 717a7e14dcfSSatish Balay 7189566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD)); 7199566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD)); 7209566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->localf, &f)); 721ad540459SPierre Jolivet for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; 7229566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->localf, &f)); 72394e8dccaSTodd Munson 72494e8dccaSTodd Munson mfqP->model_indices[ii++] = i; 725a7e14dcfSSatish Balay } 726ad540459SPierre Jolivet for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j]; 7279566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->localfmin, &fmin)); 728a7e14dcfSSatish Balay } 729a7e14dcfSSatish Balay 730a7e14dcfSSatish Balay /* Determine the initial quadratic models */ 731a7e14dcfSSatish Balay /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */ 732a7e14dcfSSatish Balay /* D (nxn) Fdiff (nxm) => G (nxm) */ 733125ddc8aSJason Sarich blasncopy = blasn; 734792fecdfSBarry Smith PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&blasn, &blasm, mfqP->Disp, &blasnpmax, mfqP->iwork, mfqP->Fdiff, &blasncopy, &info)); 735835f2295SStefano Zampini PetscCall(PetscInfo(tao, "Linear solve return: %" PetscBLASInt_FMT "\n", info)); 736a7e14dcfSSatish Balay 7379566063dSJacob Faibussowitsch PetscCall(pounders_update_res(tao)); 738a7e14dcfSSatish Balay 739a7e14dcfSSatish Balay valid = PETSC_TRUE; 740a7e14dcfSSatish Balay 7419566063dSJacob Faibussowitsch PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 7429566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(tao->gradient)); 7439566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(tao->gradient)); 7449566063dSJacob Faibussowitsch PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 745a7e14dcfSSatish Balay gnorm *= mfqP->delta; 7469566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 747763847b4SAlp Dener 748763847b4SAlp Dener tao->reason = TAO_CONTINUE_ITERATING; 7499566063dSJacob Faibussowitsch PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its)); 7509566063dSJacob Faibussowitsch PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step)); 751dbbe0bcdSBarry Smith PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 752763847b4SAlp Dener 753a7e14dcfSSatish Balay mfqP->nHist = mfqP->n + 1; 754a7e14dcfSSatish Balay mfqP->nmodelpoints = mfqP->n + 1; 7559566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Initial gradient: %20.19e\n", (double)gnorm)); 756a7e14dcfSSatish Balay 757763847b4SAlp Dener while (tao->reason == TAO_CONTINUE_ITERATING) { 758a7e14dcfSSatish Balay PetscReal gnm = 1e-4; 759e1e80dc8SAlp Dener /* Call general purpose update function */ 760dbbe0bcdSBarry Smith PetscTryTypeMethod(tao, update, tao->niter, tao->user_update); 7618931d482SJason Sarich tao->niter++; 762ce902467SBarry Smith /* Solve the subproblem min{Q(s): ||s|| <= 1.0} */ 7639566063dSJacob Faibussowitsch PetscCall(gqtwrap(tao, &gnm, &mdec)); 764a7e14dcfSSatish Balay /* Evaluate the function at the new point */ 765a7e14dcfSSatish Balay 766ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->work[i] = mfqP->Xsubproblem[i] * mfqP->delta + mfqP->xmin[i]; 7679566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[mfqP->nHist])); 7689566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[mfqP->nHist])); 7699566063dSJacob Faibussowitsch PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, mfqP->work, INSERT_VALUES)); 7709566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist])); 7719566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist])); 772a7e14dcfSSatish Balay 7739566063dSJacob Faibussowitsch PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist])); 774125ddc8aSJason Sarich 775a7e14dcfSSatish Balay rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec; 776a7e14dcfSSatish Balay mfqP->nHist++; 777a7e14dcfSSatish Balay 778a7e14dcfSSatish Balay /* Update the center */ 779a7e14dcfSSatish Balay if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid == PETSC_TRUE)) { 780a7e14dcfSSatish Balay /* Update model to reflect new base point */ 781ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i]) / mfqP->delta; 782a7e14dcfSSatish Balay for (j = 0; j < mfqP->m; j++) { 783a7e14dcfSSatish Balay /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work'; 784a7e14dcfSSatish Balay G(:,j) = G(:,j) + H(:,:,j)*work' */ 785a7e14dcfSSatish Balay for (k = 0; k < mfqP->n; k++) { 786a7e14dcfSSatish Balay mfqP->work2[k] = 0.0; 787ad540459SPierre Jolivet for (l = 0; l < mfqP->n; l++) mfqP->work2[k] += mfqP->H[j + mfqP->m * (k + l * mfqP->n)] * mfqP->work[l]; 788a7e14dcfSSatish Balay } 789a7e14dcfSSatish Balay for (i = 0; i < mfqP->n; i++) { 790a7e14dcfSSatish Balay mfqP->C[j] += mfqP->work[i] * (mfqP->Fdiff[i + mfqP->n * j] + 0.5 * mfqP->work2[i]); 791a7e14dcfSSatish Balay mfqP->Fdiff[i + mfqP->n * j] += mfqP->work2[i]; 792a7e14dcfSSatish Balay } 793a7e14dcfSSatish Balay } 794a7e14dcfSSatish Balay /* Cres += work*Gres + .5*work*Hres*work'; 795a7e14dcfSSatish Balay Gres += Hres*work'; */ 796a7e14dcfSSatish Balay 797792fecdfSBarry Smith PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &one, mfqP->Hres, &blasn, mfqP->work, &ione, &zero, mfqP->work2, &ione)); 798ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] += mfqP->work2[i]; 799a7e14dcfSSatish Balay mfqP->minindex = mfqP->nHist - 1; 800a7e14dcfSSatish Balay minnorm = mfqP->Fres[mfqP->minindex]; 8019566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res)); 802a7e14dcfSSatish Balay /* Change current center */ 8039566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 804ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 8059566063dSJacob Faibussowitsch PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 806a7e14dcfSSatish Balay } 807a7e14dcfSSatish Balay 808a7e14dcfSSatish Balay /* Evaluate at a model-improving point if necessary */ 809a7e14dcfSSatish Balay if (valid == PETSC_FALSE) { 810a7e14dcfSSatish Balay mfqP->q_is_I = 1; 8118a35fac9SJason Sarich mfqP->nmodelpoints = 0; 8129566063dSJacob Faibussowitsch PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1)); 813a7e14dcfSSatish Balay if (mfqP->nmodelpoints < mfqP->n) { 8149566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Model not valid -- model-improving\n")); 8159566063dSJacob Faibussowitsch PetscCall(modelimprove(tao, mfqP, 1)); 816a7e14dcfSSatish Balay } 817a7e14dcfSSatish Balay } 818a7e14dcfSSatish Balay 819a7e14dcfSSatish Balay /* Update the trust region radius */ 820a7e14dcfSSatish Balay deltaold = mfqP->delta; 821a7e14dcfSSatish Balay normxsp = 0; 822ad540459SPierre Jolivet for (i = 0; i < mfqP->n; i++) normxsp += mfqP->Xsubproblem[i] * mfqP->Xsubproblem[i]; 823a7e14dcfSSatish Balay normxsp = PetscSqrtReal(normxsp); 824a7e14dcfSSatish Balay if (rho >= mfqP->eta1 && normxsp > 0.5 * mfqP->delta) { 825a7e14dcfSSatish Balay mfqP->delta = PetscMin(mfqP->delta * mfqP->gamma1, mfqP->deltamax); 826a7e14dcfSSatish Balay } else if (valid == PETSC_TRUE) { 827a7e14dcfSSatish Balay mfqP->delta = PetscMax(mfqP->delta * mfqP->gamma0, mfqP->deltamin); 828a7e14dcfSSatish Balay } 829a7e14dcfSSatish Balay 830a7e14dcfSSatish Balay /* Compute the next interpolation set */ 831a7e14dcfSSatish Balay mfqP->q_is_I = 1; 832a7e14dcfSSatish Balay mfqP->nmodelpoints = 0; 8339566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c1 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c1)); 8349566063dSJacob Faibussowitsch PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1)); 835a7e14dcfSSatish Balay if (mfqP->nmodelpoints == mfqP->n) { 836a7e14dcfSSatish Balay valid = PETSC_TRUE; 837a7e14dcfSSatish Balay } else { 838a7e14dcfSSatish Balay valid = PETSC_FALSE; 8399566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c2 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c2)); 8409566063dSJacob Faibussowitsch PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c2)); 841a7e14dcfSSatish Balay if (mfqP->n > mfqP->nmodelpoints) { 8429566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Model not valid -- adding geometry points\n")); 8439566063dSJacob Faibussowitsch PetscCall(modelimprove(tao, mfqP, mfqP->n - mfqP->nmodelpoints)); 844a7e14dcfSSatish Balay } 845a7e14dcfSSatish Balay } 846ad540459SPierre Jolivet for (i = mfqP->nmodelpoints; i > 0; i--) mfqP->model_indices[i] = mfqP->model_indices[i - 1]; 847a7e14dcfSSatish Balay mfqP->nmodelpoints++; 848a7e14dcfSSatish Balay mfqP->model_indices[0] = mfqP->minindex; 8499566063dSJacob Faibussowitsch PetscCall(morepoints(mfqP)); 850a7e14dcfSSatish Balay for (i = 0; i < mfqP->nmodelpoints; i++) { 8519566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Xhist[mfqP->model_indices[i]], &x)); 852ad540459SPierre Jolivet for (j = 0; j < mfqP->n; j++) mfqP->Disp[i + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / deltaold; 8539566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]], &x)); 8549566063dSJacob Faibussowitsch PetscCall(VecGetArray(mfqP->Fhist[mfqP->model_indices[i]], &f)); 855a7e14dcfSSatish Balay for (j = 0; j < mfqP->m; j++) { 856a7e14dcfSSatish Balay for (k = 0; k < mfqP->n; k++) { 857a7e14dcfSSatish Balay mfqP->work[k] = 0.0; 858ad540459SPierre Jolivet for (l = 0; l < mfqP->n; l++) mfqP->work[k] += mfqP->H[j + mfqP->m * (k + mfqP->n * l)] * mfqP->Disp[i + mfqP->npmax * l]; 859a7e14dcfSSatish Balay } 860792fecdfSBarry Smith PetscCallBLAS("BLASdot", mfqP->RES[j * mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn, &mfqP->Fdiff[j * mfqP->n], &ione, &mfqP->Disp[i], &blasnpmax) - 0.5 * BLASdot_(&blasn, mfqP->work, &ione, &mfqP->Disp[i], &blasnpmax) + f[j]); 861a7e14dcfSSatish Balay } 8629566063dSJacob Faibussowitsch PetscCall(VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]], &f)); 863a7e14dcfSSatish Balay } 864a7e14dcfSSatish Balay 865a7e14dcfSSatish Balay /* Update the quadratic model */ 86663a3b9bcSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Get Quad, size: %" PetscInt_FMT ", points: %" PetscInt_FMT "\n", mfqP->n, mfqP->nmodelpoints)); 8679566063dSJacob Faibussowitsch PetscCall(getquadpounders(mfqP)); 8689566063dSJacob Faibussowitsch PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 869792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasm, fmin, &ione, mfqP->C, &ione)); 870a7e14dcfSSatish Balay /* G = G*(delta/deltaold) + Gdel */ 871a7e14dcfSSatish Balay ratio = mfqP->delta / deltaold; 872a7e14dcfSSatish Balay iblas = blasm * blasn; 873792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->Fdiff, &ione)); 874792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Gdel, &ione, mfqP->Fdiff, &ione)); 875125ddc8aSJason Sarich /* H = H*(delta/deltaold)^2 + Hdel */ 876a7e14dcfSSatish Balay iblas = blasm * blasn * blasn; 877a7e14dcfSSatish Balay ratio *= ratio; 878792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->H, &ione)); 879792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Hdel, &ione, mfqP->H, &ione)); 880a7e14dcfSSatish Balay 881a7e14dcfSSatish Balay /* Get residuals */ 8829566063dSJacob Faibussowitsch PetscCall(pounders_update_res(tao)); 883a7e14dcfSSatish Balay 884a7e14dcfSSatish Balay /* Export solution and gradient residual to TAO */ 8859566063dSJacob Faibussowitsch PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 8869566063dSJacob Faibussowitsch PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 8879566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(tao->gradient)); 8889566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(tao->gradient)); 8899566063dSJacob Faibussowitsch PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 890a7e14dcfSSatish Balay gnorm *= mfqP->delta; 891a7e14dcfSSatish Balay /* final criticality test */ 8929566063dSJacob Faibussowitsch PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its)); 8939566063dSJacob Faibussowitsch PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step)); 894dbbe0bcdSBarry Smith PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 895ff2b74efSJason Sarich /* test for repeated model */ 896c9376581SJason Sarich if (mfqP->nmodelpoints == mfqP->last_nmodelpoints) { 897c9376581SJason Sarich same = PETSC_TRUE; 898c9376581SJason Sarich } else { 899c9376581SJason Sarich same = PETSC_FALSE; 900c9376581SJason Sarich } 901ff2b74efSJason Sarich for (i = 0; i < mfqP->nmodelpoints; i++) { 902c9376581SJason Sarich if (same) { 903c9376581SJason Sarich if (mfqP->model_indices[i] == mfqP->last_model_indices[i]) { 904c9376581SJason Sarich same = PETSC_TRUE; 905c9376581SJason Sarich } else { 906c9376581SJason Sarich same = PETSC_FALSE; 907c9376581SJason Sarich } 908c9376581SJason Sarich } 909ff2b74efSJason Sarich mfqP->last_model_indices[i] = mfqP->model_indices[i]; 910ff2b74efSJason Sarich } 911ff2b74efSJason Sarich mfqP->last_nmodelpoints = mfqP->nmodelpoints; 912ff2b74efSJason Sarich if (same && mfqP->delta == deltaold) { 9139566063dSJacob Faibussowitsch PetscCall(PetscInfo(tao, "Identical model used in successive iterations\n")); 9145d72676cSJason Sarich tao->reason = TAO_CONVERGED_STEPTOL; 915ff2b74efSJason Sarich } 916a7e14dcfSSatish Balay } 9173ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 918a7e14dcfSSatish Balay } 919a7e14dcfSSatish Balay 920d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoSetUp_POUNDERS(Tao tao) 921d71ae5a4SJacob Faibussowitsch { 922a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 923125ddc8aSJason Sarich PetscInt i, j; 924a7e14dcfSSatish Balay IS isfloc, isfglob, isxloc, isxglob; 925a7e14dcfSSatish Balay 926a7e14dcfSSatish Balay PetscFunctionBegin; 9279566063dSJacob Faibussowitsch if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient)); 9289566063dSJacob Faibussowitsch if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection)); 9299566063dSJacob Faibussowitsch PetscCall(VecGetSize(tao->solution, &mfqP->n)); 9309566063dSJacob Faibussowitsch PetscCall(VecGetSize(tao->ls_res, &mfqP->m)); 931a7e14dcfSSatish Balay mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n); 932606f75f6SBarry Smith if (mfqP->npmax == PETSC_CURRENT) mfqP->npmax = 2 * mfqP->n + 1; 933a7e14dcfSSatish Balay mfqP->npmax = PetscMin((mfqP->n + 1) * (mfqP->n + 2) / 2, mfqP->npmax); 934a7e14dcfSSatish Balay mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n + 2); 935a7e14dcfSSatish Balay 9369566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Xhist)); 9379566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fhist)); 938a7e14dcfSSatish Balay for (i = 0; i < mfqP->n + 1; i++) { 9399566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[i])); 9409566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[i])); 941a7e14dcfSSatish Balay } 9429566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->solution, &mfqP->workxvec)); 9439566063dSJacob Faibussowitsch PetscCall(VecDuplicate(tao->ls_res, &mfqP->workfvec)); 944a7e14dcfSSatish Balay mfqP->nHist = 0; 945a7e14dcfSSatish Balay 9469566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fres)); 9479566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->m, &mfqP->RES)); 9489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->work)); 9499566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->work2)); 9509566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->work3)); 9519566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n + 1), &mfqP->mwork)); 9529566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax - mfqP->n - 1, &mfqP->omega)); 9539566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2, &mfqP->beta)); 9549566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n + 1, &mfqP->alpha)); 955a7e14dcfSSatish Balay 9569566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->H)); 9579566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q)); 9589566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q_tmp)); 9599566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L)); 9609566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_tmp)); 9619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_save)); 9629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->N)); 9639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->n + 1), &mfqP->M)); 9649566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->npmax - mfqP->n - 1), &mfqP->Z)); 9659566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau)); 9669566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau_tmp)); 967a7e14dcfSSatish Balay mfqP->nmax = PetscMax(5 * mfqP->npmax, mfqP->n * (mfqP->n + 1) / 2); 9689566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxwork)); 9699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxiwork)); 9709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->xmin)); 9719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m, &mfqP->C)); 9729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Fdiff)); 9739566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax * mfqP->n, &mfqP->Disp)); 9749566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->Gres)); 9759566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Hres)); 9769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Gpoints)); 9779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->model_indices)); 9789566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->last_model_indices)); 9799566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->Xsubproblem)); 9809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Gdel)); 9819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->Hdel)); 9829566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n), &mfqP->indices)); 9839566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->n, &mfqP->iwork)); 9849566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(mfqP->m * mfqP->m, &mfqP->w)); 985125ddc8aSJason Sarich for (i = 0; i < mfqP->m; i++) { 986125ddc8aSJason Sarich for (j = 0; j < mfqP->m; j++) { 987125ddc8aSJason Sarich if (i == j) { 988125ddc8aSJason Sarich mfqP->w[i + mfqP->m * j] = 1.0; 989125ddc8aSJason Sarich } else { 990125ddc8aSJason Sarich mfqP->w[i + mfqP->m * j] = 0.0; 991125ddc8aSJason Sarich } 992125ddc8aSJason Sarich } 993125ddc8aSJason Sarich } 994ad540459SPierre Jolivet for (i = 0; i < PetscMax(mfqP->m, mfqP->n); i++) mfqP->indices[i] = i; 9959566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(((PetscObject)tao)->comm, &mfqP->size)); 996f40bd260SBarry Smith if (mfqP->size > 1) { 9979566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localx)); 9989566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localxmin)); 9999566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localf)); 10009566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localfmin)); 10019566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxloc)); 10029566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxglob)); 10039566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfloc)); 10049566063dSJacob Faibussowitsch PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfglob)); 1005a7e14dcfSSatish Balay 10069566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(tao->solution, isxglob, mfqP->localx, isxloc, &mfqP->scatterx)); 10079566063dSJacob Faibussowitsch PetscCall(VecScatterCreate(tao->ls_res, isfglob, mfqP->localf, isfloc, &mfqP->scatterf)); 1008a7e14dcfSSatish Balay 10099566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isxloc)); 10109566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isxglob)); 10119566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isfloc)); 10129566063dSJacob Faibussowitsch PetscCall(ISDestroy(&isfglob)); 1013a7e14dcfSSatish Balay } 1014a7e14dcfSSatish Balay 1015a7e14dcfSSatish Balay if (!mfqP->usegqt) { 1016a7e14dcfSSatish Balay KSP ksp; 1017a7e14dcfSSatish Balay PC pc; 10189566063dSJacob Faibussowitsch PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Xsubproblem, &mfqP->subx)); 10199566063dSJacob Faibussowitsch PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->subxl)); 10209566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subb)); 10219566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subxu)); 10229566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subpdel)); 10239566063dSJacob Faibussowitsch PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subndel)); 10249566063dSJacob Faibussowitsch PetscCall(TaoCreate(PETSC_COMM_SELF, &mfqP->subtao)); 10259566063dSJacob Faibussowitsch PetscCall(PetscObjectIncrementTabLevel((PetscObject)mfqP->subtao, (PetscObject)tao, 1)); 10269566063dSJacob Faibussowitsch PetscCall(TaoSetType(mfqP->subtao, TAOBNTR)); 10279566063dSJacob Faibussowitsch PetscCall(TaoSetOptionsPrefix(mfqP->subtao, "pounders_subsolver_")); 10289566063dSJacob Faibussowitsch PetscCall(TaoSetSolution(mfqP->subtao, mfqP->subx)); 10299566063dSJacob Faibussowitsch PetscCall(TaoSetObjectiveAndGradient(mfqP->subtao, NULL, pounders_fg, (void *)mfqP)); 10309566063dSJacob Faibussowitsch PetscCall(TaoSetMaximumIterations(mfqP->subtao, mfqP->gqt_maxits)); 10319566063dSJacob Faibussowitsch PetscCall(TaoSetFromOptions(mfqP->subtao)); 10329566063dSJacob Faibussowitsch PetscCall(TaoGetKSP(mfqP->subtao, &ksp)); 1033a7e14dcfSSatish Balay if (ksp) { 10349566063dSJacob Faibussowitsch PetscCall(KSPGetPC(ksp, &pc)); 10359566063dSJacob Faibussowitsch PetscCall(PCSetType(pc, PCNONE)); 1036a7e14dcfSSatish Balay } 10379566063dSJacob Faibussowitsch PetscCall(TaoSetVariableBounds(mfqP->subtao, mfqP->subxl, mfqP->subxu)); 10389566063dSJacob Faibussowitsch PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Hres, &mfqP->subH)); 10399566063dSJacob Faibussowitsch PetscCall(TaoSetHessian(mfqP->subtao, mfqP->subH, mfqP->subH, pounders_h, (void *)mfqP)); 1040a7e14dcfSSatish Balay } 10413ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1042a7e14dcfSSatish Balay } 1043a7e14dcfSSatish Balay 1044d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoDestroy_POUNDERS(Tao tao) 1045d71ae5a4SJacob Faibussowitsch { 1046a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1047a7e14dcfSSatish Balay PetscInt i; 1048a7e14dcfSSatish Balay 1049a7e14dcfSSatish Balay PetscFunctionBegin; 1050a7e14dcfSSatish Balay if (!mfqP->usegqt) { 10519566063dSJacob Faibussowitsch PetscCall(TaoDestroy(&mfqP->subtao)); 10529566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subx)); 10539566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subxl)); 10549566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subxu)); 10559566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subb)); 10569566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subpdel)); 10579566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->subndel)); 10589566063dSJacob Faibussowitsch PetscCall(MatDestroy(&mfqP->subH)); 1059a7e14dcfSSatish Balay } 10609566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Fres)); 10619566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->RES)); 10629566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->work)); 10639566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->work2)); 10649566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->work3)); 10659566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->mwork)); 10669566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->omega)); 10679566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->beta)); 10689566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->alpha)); 10699566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->H)); 10709566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Q)); 10719566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Q_tmp)); 10729566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->L)); 10739566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->L_tmp)); 10749566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->L_save)); 10759566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->N)); 10769566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->M)); 10779566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Z)); 10789566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->tau)); 10799566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->tau_tmp)); 10809566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->npmaxwork)); 10819566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->npmaxiwork)); 10829566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->xmin)); 10839566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->C)); 10849566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Fdiff)); 10859566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Disp)); 10869566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Gres)); 10879566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Hres)); 10889566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Gpoints)); 10899566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->model_indices)); 10909566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->last_model_indices)); 10919566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Xsubproblem)); 10929566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Gdel)); 10939566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Hdel)); 10949566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->indices)); 10959566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->iwork)); 10969566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->w)); 1097a7e14dcfSSatish Balay for (i = 0; i < mfqP->nHist; i++) { 10989566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->Xhist[i])); 10999566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->Fhist[i])); 1100a7e14dcfSSatish Balay } 11019566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->workxvec)); 11029566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->workfvec)); 11039566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Xhist)); 11049566063dSJacob Faibussowitsch PetscCall(PetscFree(mfqP->Fhist)); 1105a7e14dcfSSatish Balay 1106f40bd260SBarry Smith if (mfqP->size > 1) { 11079566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localx)); 11089566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localxmin)); 11099566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localf)); 11109566063dSJacob Faibussowitsch PetscCall(VecDestroy(&mfqP->localfmin)); 1111a7e14dcfSSatish Balay } 11129566063dSJacob Faibussowitsch PetscCall(PetscFree(tao->data)); 11133ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1114a7e14dcfSSatish Balay } 1115a7e14dcfSSatish Balay 1116ce78bad3SBarry Smith static PetscErrorCode TaoSetFromOptions_POUNDERS(Tao tao, PetscOptionItems PetscOptionsObject) 1117d71ae5a4SJacob Faibussowitsch { 1118a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1119a7e14dcfSSatish Balay 1120a7e14dcfSSatish Balay PetscFunctionBegin; 1121d0609cedSBarry Smith PetscOptionsHeadBegin(PetscOptionsObject, "POUNDERS method for least-squares optimization"); 11229566063dSJacob Faibussowitsch PetscCall(PetscOptionsReal("-tao_pounders_delta", "initial delta", "", mfqP->delta, &mfqP->delta0, NULL)); 1123ff2b74efSJason Sarich mfqP->delta = mfqP->delta0; 11249566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-tao_pounders_npmax", "max number of points in model", "", mfqP->npmax, &mfqP->npmax, NULL)); 11259566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-tao_pounders_gqt", "use gqt algorithm for subproblem", "", mfqP->usegqt, &mfqP->usegqt, NULL)); 1126d0609cedSBarry Smith PetscOptionsHeadEnd(); 11273ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1128a7e14dcfSSatish Balay } 1129a7e14dcfSSatish Balay 1130d71ae5a4SJacob Faibussowitsch static PetscErrorCode TaoView_POUNDERS(Tao tao, PetscViewer viewer) 1131d71ae5a4SJacob Faibussowitsch { 1132a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1133a7e14dcfSSatish Balay PetscBool isascii; 1134a7e14dcfSSatish Balay 1135a7e14dcfSSatish Balay PetscFunctionBegin; 11369566063dSJacob Faibussowitsch PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1137a7e14dcfSSatish Balay if (isascii) { 11389566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "initial delta: %g\n", (double)mfqP->delta0)); 11399566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "final delta: %g\n", (double)mfqP->delta)); 114063a3b9bcSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "model points: %" PetscInt_FMT "\n", mfqP->nmodelpoints)); 1141a7e14dcfSSatish Balay if (mfqP->usegqt) { 11429566063dSJacob Faibussowitsch PetscCall(PetscViewerASCIIPrintf(viewer, "subproblem solver: gqt\n")); 1143a7e14dcfSSatish Balay } else { 11449566063dSJacob Faibussowitsch PetscCall(TaoView(mfqP->subtao, viewer)); 1145a7e14dcfSSatish Balay } 1146a7e14dcfSSatish Balay } 11473ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1148a7e14dcfSSatish Balay } 11491eb8069cSJason Sarich /*MC 11501eb8069cSJason Sarich TAOPOUNDERS - POUNDERS derivate-free model-based algorithm for nonlinear least squares 11511eb8069cSJason Sarich 11521eb8069cSJason Sarich Options Database Keys: 11531eb8069cSJason Sarich + -tao_pounders_delta - initial step length 11541eb8069cSJason Sarich . -tao_pounders_npmax - maximum number of points in model 11551eb8069cSJason Sarich - -tao_pounders_gqt - use gqt algorithm for subproblem instead of TRON 11561eb8069cSJason Sarich 11571eb8069cSJason Sarich Level: beginner 11581eb8069cSJason Sarich 11591eb8069cSJason Sarich M*/ 1160a7e14dcfSSatish Balay 1161d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode TaoCreate_POUNDERS(Tao tao) 1162d71ae5a4SJacob Faibussowitsch { 1163a7e14dcfSSatish Balay TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1164a7e14dcfSSatish Balay 1165a7e14dcfSSatish Balay PetscFunctionBegin; 1166a7e14dcfSSatish Balay tao->ops->setup = TaoSetUp_POUNDERS; 1167a7e14dcfSSatish Balay tao->ops->solve = TaoSolve_POUNDERS; 1168a7e14dcfSSatish Balay tao->ops->view = TaoView_POUNDERS; 1169a7e14dcfSSatish Balay tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS; 1170a7e14dcfSSatish Balay tao->ops->destroy = TaoDestroy_POUNDERS; 1171a7e14dcfSSatish Balay 11724dfa11a4SJacob Faibussowitsch PetscCall(PetscNew(&mfqP)); 1173a7e14dcfSSatish Balay tao->data = (void *)mfqP; 1174606f75f6SBarry Smith 11756552cf8aSJason Sarich /* Override default settings (unless already changed) */ 1176606f75f6SBarry Smith PetscCall(TaoParametersInitialize(tao)); 1177606f75f6SBarry Smith PetscObjectParameterSetDefault(tao, max_it, 2000); 1178606f75f6SBarry Smith PetscObjectParameterSetDefault(tao, max_funcs, 4000); 1179606f75f6SBarry Smith 1180606f75f6SBarry Smith mfqP->npmax = PETSC_CURRENT; 1181ff2b74efSJason Sarich mfqP->delta0 = 0.1; 1182a7e14dcfSSatish Balay mfqP->delta = 0.1; 1183a7e14dcfSSatish Balay mfqP->deltamax = 1e3; 1184125ddc8aSJason Sarich mfqP->deltamin = 1e-6; 1185a8bd9cb3STodd Munson mfqP->c2 = 10.0; 1186a7e14dcfSSatish Balay mfqP->theta1 = 1e-5; 1187a7e14dcfSSatish Balay mfqP->theta2 = 1e-4; 1188a7e14dcfSSatish Balay mfqP->gamma0 = 0.5; 1189a7e14dcfSSatish Balay mfqP->gamma1 = 2.0; 1190a7e14dcfSSatish Balay mfqP->eta0 = 0.0; 1191a7e14dcfSSatish Balay mfqP->eta1 = 0.1; 1192f553c655STodd Munson mfqP->usegqt = PETSC_FALSE; 1193a7e14dcfSSatish Balay mfqP->gqt_rtol = 0.001; 1194a7e14dcfSSatish Balay mfqP->gqt_maxits = 50; 119583c8fe1dSLisandro Dalcin mfqP->workxvec = NULL; 11963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1197a7e14dcfSSatish Balay } 1198