1 #include <../src/tao/leastsquares/impls/pounders/pounders.h> 2 3 static PetscErrorCode pounders_h(Tao subtao, Vec v, Mat H, Mat Hpre, void *ctx) 4 { 5 PetscFunctionBegin; 6 PetscFunctionReturn(PETSC_SUCCESS); 7 } 8 9 static PetscErrorCode pounders_fg(Tao subtao, Vec x, PetscReal *f, Vec g, void *ctx) 10 { 11 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)ctx; 12 PetscReal d1, d2; 13 14 PetscFunctionBegin; 15 /* g = A*x (add b later)*/ 16 PetscCall(MatMult(mfqP->subH, x, g)); 17 18 /* f = 1/2 * x'*(Ax) + b'*x */ 19 PetscCall(VecDot(x, g, &d1)); 20 PetscCall(VecDot(mfqP->subb, x, &d2)); 21 *f = 0.5 * d1 + d2; 22 23 /* now g = g + b */ 24 PetscCall(VecAXPY(g, 1.0, mfqP->subb)); 25 PetscFunctionReturn(PETSC_SUCCESS); 26 } 27 28 static PetscErrorCode pounders_feval(Tao tao, Vec x, Vec F, PetscReal *fsum) 29 { 30 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 31 PetscInt i, row, col; 32 PetscReal fr, fc; 33 34 PetscFunctionBegin; 35 PetscCall(TaoComputeResidual(tao, x, F)); 36 if (tao->res_weights_v) { 37 PetscCall(VecPointwiseMult(mfqP->workfvec, tao->res_weights_v, F)); 38 PetscCall(VecDot(mfqP->workfvec, mfqP->workfvec, fsum)); 39 } else if (tao->res_weights_w) { 40 *fsum = 0; 41 for (i = 0; i < tao->res_weights_n; i++) { 42 row = tao->res_weights_rows[i]; 43 col = tao->res_weights_cols[i]; 44 PetscCall(VecGetValues(F, 1, &row, &fr)); 45 PetscCall(VecGetValues(F, 1, &col, &fc)); 46 *fsum += tao->res_weights_w[i] * fc * fr; 47 } 48 } else { 49 PetscCall(VecDot(F, F, fsum)); 50 } 51 PetscCall(PetscInfo(tao, "Least-squares residual norm: %20.19e\n", (double)*fsum)); 52 PetscCheck(!PetscIsInfOrNanReal(*fsum), PETSC_COMM_SELF, PETSC_ERR_USER, "User provided compute function generated infinity or NaN"); 53 PetscFunctionReturn(PETSC_SUCCESS); 54 } 55 56 static PetscErrorCode gqtwrap(Tao tao, PetscReal *gnorm, PetscReal *qmin) 57 { 58 #if defined(PETSC_USE_REAL_SINGLE) 59 PetscReal atol = 1.0e-5; 60 #else 61 PetscReal atol = 1.0e-10; 62 #endif 63 PetscInt info, its; 64 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 65 66 PetscFunctionBegin; 67 if (!mfqP->usegqt) { 68 PetscReal maxval; 69 PetscInt i, j; 70 71 PetscCall(VecSetValues(mfqP->subb, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 72 PetscCall(VecAssemblyBegin(mfqP->subb)); 73 PetscCall(VecAssemblyEnd(mfqP->subb)); 74 75 PetscCall(VecSet(mfqP->subx, 0.0)); 76 77 PetscCall(VecSet(mfqP->subndel, -1.0)); 78 PetscCall(VecSet(mfqP->subpdel, +1.0)); 79 80 /* Complete the lower triangle of the Hessian matrix */ 81 for (i = 0; i < mfqP->n; i++) { 82 for (j = i + 1; j < mfqP->n; j++) mfqP->Hres[j + mfqP->n * i] = mfqP->Hres[mfqP->n * j + i]; 83 } 84 PetscCall(MatSetValues(mfqP->subH, mfqP->n, mfqP->indices, mfqP->n, mfqP->indices, mfqP->Hres, INSERT_VALUES)); 85 PetscCall(MatAssemblyBegin(mfqP->subH, MAT_FINAL_ASSEMBLY)); 86 PetscCall(MatAssemblyEnd(mfqP->subH, MAT_FINAL_ASSEMBLY)); 87 88 PetscCall(TaoResetStatistics(mfqP->subtao)); 89 /* PetscCall(TaoSetTolerances(mfqP->subtao,*gnorm,*gnorm,PETSC_CURRENT)); */ 90 /* enforce bound constraints -- experimental */ 91 if (tao->XU && tao->XL) { 92 PetscCall(VecCopy(tao->XU, mfqP->subxu)); 93 PetscCall(VecAXPY(mfqP->subxu, -1.0, tao->solution)); 94 PetscCall(VecScale(mfqP->subxu, 1.0 / mfqP->delta)); 95 PetscCall(VecCopy(tao->XL, mfqP->subxl)); 96 PetscCall(VecAXPY(mfqP->subxl, -1.0, tao->solution)); 97 PetscCall(VecScale(mfqP->subxl, 1.0 / mfqP->delta)); 98 99 PetscCall(VecPointwiseMin(mfqP->subxu, mfqP->subxu, mfqP->subpdel)); 100 PetscCall(VecPointwiseMax(mfqP->subxl, mfqP->subxl, mfqP->subndel)); 101 } else { 102 PetscCall(VecCopy(mfqP->subpdel, mfqP->subxu)); 103 PetscCall(VecCopy(mfqP->subndel, mfqP->subxl)); 104 } 105 /* Make sure xu > xl */ 106 PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 107 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 108 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 109 PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "upper bound < lower bound in subproblem"); 110 /* Make sure xu > tao->solution > xl */ 111 PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 112 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx)); 113 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 114 PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess < lower bound in subproblem"); 115 116 PetscCall(VecCopy(mfqP->subx, mfqP->subpdel)); 117 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 118 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 119 PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess > upper bound in subproblem"); 120 121 PetscCall(TaoSolve(mfqP->subtao)); 122 PetscCall(TaoGetSolutionStatus(mfqP->subtao, NULL, qmin, NULL, NULL, NULL, NULL)); 123 124 /* test bounds post-solution*/ 125 PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 126 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx)); 127 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 128 if (maxval > 1e-5) { 129 PetscCall(PetscInfo(tao, "subproblem solution < lower bound\n")); 130 tao->reason = TAO_DIVERGED_TR_REDUCTION; 131 } 132 133 PetscCall(VecCopy(mfqP->subx, mfqP->subpdel)); 134 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 135 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 136 if (maxval > 1e-5) { 137 PetscCall(PetscInfo(tao, "subproblem solution > upper bound\n")); 138 tao->reason = TAO_DIVERGED_TR_REDUCTION; 139 } 140 } else { 141 PetscCall(gqt(mfqP->n, mfqP->Hres, mfqP->n, mfqP->Gres, 1.0, mfqP->gqt_rtol, atol, mfqP->gqt_maxits, gnorm, qmin, mfqP->Xsubproblem, &info, &its, mfqP->work, mfqP->work2, mfqP->work3)); 142 } 143 *qmin *= -1; 144 PetscFunctionReturn(PETSC_SUCCESS); 145 } 146 147 static PetscErrorCode pounders_update_res(Tao tao) 148 { 149 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 150 PetscInt i, row, col; 151 PetscBLASInt blasn, blasn2, blasm, ione = 1; 152 PetscReal zero = 0.0, one = 1.0, wii, factor; 153 154 PetscFunctionBegin; 155 PetscCall(PetscBLASIntCast(mfqP->n, &blasn)); 156 PetscCall(PetscBLASIntCast(mfqP->m, &blasm)); 157 PetscCall(PetscBLASIntCast(mfqP->n * mfqP->n, &blasn2)); 158 for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] = 0; 159 for (i = 0; i < mfqP->n * mfqP->n; i++) mfqP->Hres[i] = 0; 160 161 /* Compute Gres= sum_ij[wij * (cjgi + cigj)] */ 162 if (tao->res_weights_v) { 163 /* Vector(diagonal) weights: gres = sum_i(wii*ci*gi) */ 164 for (i = 0; i < mfqP->m; i++) { 165 PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &factor)); 166 factor = factor * mfqP->C[i]; 167 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * i], &ione, mfqP->Gres, &ione)); 168 } 169 170 /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 171 /* vector(diagonal weights) Hres = sum_i(wii*(ci*Hi + gi * gi')*/ 172 for (i = 0; i < mfqP->m; i++) { 173 PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &wii)); 174 if (tao->niter > 1) { 175 factor = wii * mfqP->C[i]; 176 /* add wii * ci * Hi */ 177 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione)); 178 } 179 /* add wii * gi * gi' */ 180 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &wii, &mfqP->Fdiff[blasn * i], &blasn, &mfqP->Fdiff[blasn * i], &blasn, &one, mfqP->Hres, &blasn)); 181 } 182 } else if (tao->res_weights_w) { 183 /* General case: .5 * Gres= sum_ij[wij * (cjgi + cigj)] */ 184 for (i = 0; i < tao->res_weights_n; i++) { 185 row = tao->res_weights_rows[i]; 186 col = tao->res_weights_cols[i]; 187 188 factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0; 189 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * row], &ione, mfqP->Gres, &ione)); 190 factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0; 191 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * col], &ione, mfqP->Gres, &ione)); 192 } 193 194 /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 195 /* .5 * sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 196 for (i = 0; i < tao->res_weights_n; i++) { 197 row = tao->res_weights_rows[i]; 198 col = tao->res_weights_cols[i]; 199 factor = tao->res_weights_w[i] / 2.0; 200 /* add wij * gi gj' + wij * gj gi' */ 201 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * row], &blasn, &mfqP->Fdiff[blasn * col], &blasn, &one, mfqP->Hres, &blasn)); 202 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * col], &blasn, &mfqP->Fdiff[blasn * row], &blasn, &one, mfqP->Hres, &blasn)); 203 } 204 if (tao->niter > 1) { 205 for (i = 0; i < tao->res_weights_n; i++) { 206 row = tao->res_weights_rows[i]; 207 col = tao->res_weights_cols[i]; 208 209 /* add wij*cj*Hi */ 210 factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0; 211 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[row], &blasm, mfqP->Hres, &ione)); 212 213 /* add wij*ci*Hj */ 214 factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0; 215 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[col], &blasm, mfqP->Hres, &ione)); 216 } 217 } 218 } else { 219 /* Default: Gres= sum_i[cigi] = G*c' */ 220 PetscCall(PetscInfo(tao, "Identity weights\n")); 221 PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->C, &ione, &zero, mfqP->Gres, &ione)); 222 223 /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 224 /* Hres = G*G' + 0.5 sum {F(xkin,i)*H(:,:,i)} */ 225 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->Fdiff, &blasn, &zero, mfqP->Hres, &blasn)); 226 227 /* sum(F(xkin,i)*H(:,:,i)) */ 228 if (tao->niter > 1) { 229 for (i = 0; i < mfqP->m; i++) { 230 factor = mfqP->C[i]; 231 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione)); 232 } 233 } 234 } 235 PetscFunctionReturn(PETSC_SUCCESS); 236 } 237 238 static PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi) 239 { 240 /* Phi = .5*[x(1)^2 sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */ 241 PetscInt i, j, k; 242 PetscReal sqrt2 = PetscSqrtReal(2.0); 243 244 PetscFunctionBegin; 245 j = 0; 246 for (i = 0; i < n; i++) { 247 phi[j] = 0.5 * x[i] * x[i]; 248 j++; 249 for (k = i + 1; k < n; k++) { 250 phi[j] = x[i] * x[k] / sqrt2; 251 j++; 252 } 253 } 254 PetscFunctionReturn(PETSC_SUCCESS); 255 } 256 257 static PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP) 258 { 259 /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x' 260 that satisfies the interpolation conditions Q(X[:,j]) = f(j) 261 for j=1,...,m and with a Hessian matrix of least Frobenius norm */ 262 263 /* NB --we are ignoring c */ 264 PetscInt i, j, k, num, np = mfqP->nmodelpoints; 265 PetscReal one = 1.0, zero = 0.0, negone = -1.0; 266 PetscBLASInt blasnpmax, blasnplus1, blasnp, blasint, blasint2; 267 PetscBLASInt info, ione = 1; 268 PetscReal sqrt2 = PetscSqrtReal(2.0); 269 270 PetscFunctionBegin; 271 PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax)); 272 PetscCall(PetscBLASIntCast(mfqP->n + 1, &blasnplus1)); 273 PetscCall(PetscBLASIntCast(np, &blasnp)); 274 PetscCall(PetscBLASIntCast(mfqP->n * (mfqP->n + 1) / 2, &blasint)); 275 PetscCall(PetscBLASIntCast(np - mfqP->n - 1, &blasint2)); 276 for (i = 0; i < mfqP->n * mfqP->m; i++) mfqP->Gdel[i] = 0; 277 for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; i++) mfqP->Hdel[i] = 0; 278 279 /* factor M */ 280 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&blasnplus1, &blasnp, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &info)); 281 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrf returned with value %" PetscBLASInt_FMT, info); 282 283 if (np == mfqP->n + 1) { 284 for (i = 0; i < mfqP->npmax - mfqP->n - 1; i++) mfqP->omega[i] = 0.0; 285 for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->beta[i] = 0.0; 286 } else { 287 /* Let Ltmp = (L'*L) */ 288 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &blasint2, &blasint2, &blasint, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &zero, mfqP->L_tmp, &blasint)); 289 290 /* factor Ltmp */ 291 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &blasint2, mfqP->L_tmp, &blasint, &info)); 292 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrf returned with value %" PetscBLASInt_FMT, info); 293 } 294 295 for (k = 0; k < mfqP->m; k++) { 296 if (np != mfqP->n + 1) { 297 /* Solve L'*L*Omega = Z' * RESk*/ 298 PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasnp, &blasint2, &one, mfqP->Z, &blasnpmax, &mfqP->RES[mfqP->npmax * k], &ione, &zero, mfqP->omega, &ione)); 299 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &blasint2, &ione, mfqP->L_tmp, &blasint, mfqP->omega, &blasint2, &info)); 300 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrs returned with value %" PetscBLASInt_FMT, info); 301 302 /* Beta = L*Omega */ 303 PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasint, &blasint2, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, mfqP->omega, &ione, &zero, mfqP->beta, &ione)); 304 } 305 306 /* solve M'*Alpha = RESk - N'*Beta */ 307 PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasint, &blasnp, &negone, mfqP->N, &blasint, mfqP->beta, &ione, &one, &mfqP->RES[mfqP->npmax * k], &ione)); 308 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &blasnplus1, &ione, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &mfqP->RES[mfqP->npmax * k], &blasnplus1, &info)); 309 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrs returned with value %" PetscBLASInt_FMT, info); 310 311 /* Gdel(:,k) = Alpha(2:n+1) */ 312 for (i = 0; i < mfqP->n; i++) mfqP->Gdel[i + mfqP->n * k] = mfqP->RES[mfqP->npmax * k + i + 1]; 313 314 /* Set Hdels */ 315 num = 0; 316 for (i = 0; i < mfqP->n; i++) { 317 /* H[i,i,k] = Beta(num) */ 318 mfqP->Hdel[(i * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num]; 319 num++; 320 for (j = i + 1; j < mfqP->n; j++) { 321 /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */ 322 mfqP->Hdel[(j * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num] / sqrt2; 323 mfqP->Hdel[(i * mfqP->n + j) * mfqP->m + k] = mfqP->beta[num] / sqrt2; 324 num++; 325 } 326 } 327 } 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 static PetscErrorCode morepoints(TAO_POUNDERS *mfqP) 332 { 333 /* Assumes mfqP->model_indices[0] is minimum index 334 Finishes adding points to mfqP->model_indices (up to npmax) 335 Computes L,Z,M,N 336 np is actual number of points in model (should equal npmax?) */ 337 PetscInt point, i, j, offset; 338 PetscInt reject; 339 PetscBLASInt blasn, blasnpmax, blasnplus1, info, blasnmax, blasint, blasint2, blasnp, blasmaxmn; 340 const PetscReal *x; 341 PetscReal normd; 342 343 PetscFunctionBegin; 344 PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax)); 345 PetscCall(PetscBLASIntCast(mfqP->n, &blasn)); 346 PetscCall(PetscBLASIntCast(mfqP->nmax, &blasnmax)); 347 PetscCall(PetscBLASIntCast(mfqP->n + 1, &blasnplus1)); 348 PetscCall(PetscBLASIntCast(mfqP->n, &blasnp)); 349 /* Initialize M,N */ 350 for (i = 0; i < mfqP->n + 1; i++) { 351 PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x)); 352 mfqP->M[(mfqP->n + 1) * i] = 1.0; 353 for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * i)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 354 PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x)); 355 PetscCall(phi2eval(&mfqP->M[1 + ((mfqP->n + 1) * i)], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * i])); 356 } 357 358 /* Now we add points until we have npmax starting with the most recent ones */ 359 point = mfqP->nHist - 1; 360 mfqP->nmodelpoints = mfqP->n + 1; 361 while (mfqP->nmodelpoints < mfqP->npmax && point >= 0) { 362 /* Reject any points already in the model */ 363 reject = 0; 364 for (j = 0; j < mfqP->n + 1; j++) { 365 if (point == mfqP->model_indices[j]) { 366 reject = 1; 367 break; 368 } 369 } 370 371 /* Reject if norm(d) >c2 */ 372 if (!reject) { 373 PetscCall(VecCopy(mfqP->Xhist[point], mfqP->workxvec)); 374 PetscCall(VecAXPY(mfqP->workxvec, -1.0, mfqP->Xhist[mfqP->minindex])); 375 PetscCall(VecNorm(mfqP->workxvec, NORM_2, &normd)); 376 normd /= mfqP->delta; 377 if (normd > mfqP->c2) reject = 1; 378 } 379 if (reject) { 380 point--; 381 continue; 382 } 383 384 PetscCall(VecGetArrayRead(mfqP->Xhist[point], &x)); 385 mfqP->M[(mfqP->n + 1) * mfqP->nmodelpoints] = 1.0; 386 for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * mfqP->nmodelpoints)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 387 PetscCall(VecRestoreArrayRead(mfqP->Xhist[point], &x)); 388 PetscCall(phi2eval(&mfqP->M[1 + (mfqP->n + 1) * mfqP->nmodelpoints], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * (mfqP->nmodelpoints)])); 389 390 /* Update QR factorization */ 391 /* Copy M' to Q_tmp */ 392 for (i = 0; i < mfqP->n + 1; i++) { 393 for (j = 0; j < mfqP->npmax; j++) mfqP->Q_tmp[j + mfqP->npmax * i] = mfqP->M[i + (mfqP->n + 1) * j]; 394 } 395 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints + 1, &blasnp)); 396 /* Q_tmp,R = qr(M') */ 397 PetscCall(PetscBLASIntCast(PetscMax(mfqP->m, mfqP->n + 1), &blasmaxmn)); 398 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->mwork, &blasmaxmn, &info)); 399 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine geqrf returned with value %" PetscBLASInt_FMT, info); 400 401 /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */ 402 /* L = N*Qtmp */ 403 PetscCall(PetscBLASIntCast(mfqP->n * (mfqP->n + 1) / 2, &blasint2)); 404 /* Copy N to L_tmp */ 405 for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2 * mfqP->npmax; i++) mfqP->L_tmp[i] = mfqP->N[i]; 406 /* Copy L_save to L_tmp */ 407 408 /* L_tmp = N*Qtmp' */ 409 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasint2, &blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->L_tmp, &blasint2, mfqP->npmaxwork, &blasnmax, &info)); 410 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info); 411 412 /* Copy L_tmp to L_save */ 413 for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L_save[i] = mfqP->L_tmp[i]; 414 415 /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */ 416 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints - mfqP->n, &blasint)); 417 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 418 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &blasint2, &blasint, &mfqP->L_tmp[(mfqP->n + 1) * blasint2], &blasint2, mfqP->beta, mfqP->work, &blasn, mfqP->work, &blasn, mfqP->npmaxwork, &blasnmax, &info)); 419 PetscCall(PetscFPTrapPop()); 420 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine gesvd returned with value %" PetscBLASInt_FMT, info); 421 422 if (mfqP->beta[PetscMin(blasint, blasint2) - 1] > mfqP->theta2) { 423 /* accept point */ 424 mfqP->model_indices[mfqP->nmodelpoints] = point; 425 /* Copy Q_tmp to Q */ 426 for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q[i] = mfqP->Q_tmp[i]; 427 for (i = 0; i < mfqP->npmax; i++) mfqP->tau[i] = mfqP->tau_tmp[i]; 428 mfqP->nmodelpoints++; 429 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blasnp)); 430 431 /* Copy L_save to L */ 432 for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = mfqP->L_save[i]; 433 } 434 point--; 435 } 436 437 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blasnp)); 438 /* Copy Q(:,n+2:np) to Z */ 439 /* First set Q_tmp to I */ 440 for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q_tmp[i] = 0.0; 441 for (i = 0; i < mfqP->npmax; i++) mfqP->Q_tmp[i + mfqP->npmax * i] = 1.0; 442 443 /* Q_tmp = I * Q */ 444 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasnp, &blasnp, &blasnplus1, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info)); 445 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info); 446 447 /* Copy Q_tmp(:,n+2:np) to Z) */ 448 offset = mfqP->npmax * (mfqP->n + 1); 449 for (i = offset; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Z[i - offset] = mfqP->Q_tmp[i]; 450 451 if (mfqP->nmodelpoints == mfqP->n + 1) { 452 /* Set L to I_{n+1} */ 453 for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = 0.0; 454 for (i = 0; i < mfqP->n; i++) mfqP->L[(mfqP->n * (mfqP->n + 1) / 2) * i + i] = 1.0; 455 } 456 PetscFunctionReturn(PETSC_SUCCESS); 457 } 458 459 /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */ 460 static PetscErrorCode addpoint(Tao tao, TAO_POUNDERS *mfqP, PetscInt index) 461 { 462 PetscFunctionBegin; 463 /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/ 464 PetscCall(VecDuplicate(mfqP->Xhist[0], &mfqP->Xhist[mfqP->nHist])); 465 PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, &mfqP->Q_tmp[index * mfqP->npmax], INSERT_VALUES)); 466 PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist])); 467 PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist])); 468 PetscCall(VecAYPX(mfqP->Xhist[mfqP->nHist], mfqP->delta, mfqP->Xhist[mfqP->minindex])); 469 470 /* Project into feasible region */ 471 if (tao->XU && tao->XL) PetscCall(VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU, mfqP->Xhist[mfqP->nHist])); 472 473 /* Compute value of new vector */ 474 PetscCall(VecDuplicate(mfqP->Fhist[0], &mfqP->Fhist[mfqP->nHist])); 475 CHKMEMQ; 476 PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist])); 477 478 /* Add new vector to model */ 479 mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist; 480 mfqP->nmodelpoints++; 481 mfqP->nHist++; 482 PetscFunctionReturn(PETSC_SUCCESS); 483 } 484 485 static PetscErrorCode modelimprove(Tao tao, TAO_POUNDERS *mfqP, PetscInt addallpoints) 486 { 487 /* modeld = Q(:,np+1:n)' */ 488 PetscInt i, j, minindex = 0; 489 PetscReal dp, half = 0.5, one = 1.0, minvalue = PETSC_INFINITY; 490 PetscBLASInt blasn, blasnpmax, blask, info; 491 PetscBLASInt blas1 = 1, blasnmax; 492 493 PetscFunctionBegin; 494 PetscCall(PetscBLASIntCast(mfqP->n, &blasn)); 495 PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax)); 496 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask)); 497 PetscCall(PetscBLASIntCast(mfqP->nmax, &blasnmax)); 498 499 /* Qtmp = I(n x n) */ 500 for (i = 0; i < mfqP->n; i++) { 501 for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i + mfqP->npmax * j] = 0.0; 502 } 503 for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[j + mfqP->npmax * j] = 1.0; 504 505 /* Qtmp = Q * I */ 506 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasn, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info)); 507 508 for (i = mfqP->nmodelpoints; i < mfqP->n; i++) { 509 PetscCallBLAS("BLASdot", dp = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->Gres, &blas1)); 510 if (dp > 0.0) { /* Model says use the other direction! */ 511 for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i * mfqP->npmax + j] *= -1; 512 } 513 /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */ 514 for (j = 0; j < mfqP->n; j++) mfqP->work2[j] = mfqP->Gres[j]; 515 PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &half, mfqP->Hres, &blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, &one, mfqP->work2, &blas1)); 516 PetscCallBLAS("BLASdot", mfqP->work[i] = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->work2, &blas1)); 517 if (i == mfqP->nmodelpoints || mfqP->work[i] < minvalue) { 518 minindex = i; 519 minvalue = mfqP->work[i]; 520 } 521 if (addallpoints != 0) PetscCall(addpoint(tao, mfqP, i)); 522 } 523 if (!addallpoints) PetscCall(addpoint(tao, mfqP, minindex)); 524 PetscFunctionReturn(PETSC_SUCCESS); 525 } 526 527 static PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, PetscReal c) 528 { 529 PetscInt i, j; 530 PetscBLASInt blasm, blasj, blask, blasn, ione = 1, info; 531 PetscBLASInt blasnpmax, blasmaxmn; 532 PetscReal proj, normd; 533 const PetscReal *x; 534 535 PetscFunctionBegin; 536 PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax)); 537 PetscCall(PetscBLASIntCast(mfqP->m, &blasm)); 538 PetscCall(PetscBLASIntCast(mfqP->n, &blasn)); 539 for (i = mfqP->nHist - 1; i >= 0; i--) { 540 PetscCall(VecGetArrayRead(mfqP->Xhist[i], &x)); 541 for (j = 0; j < mfqP->n; j++) mfqP->work[j] = (x[j] - xmin[j]) / mfqP->delta; 542 PetscCall(VecRestoreArrayRead(mfqP->Xhist[i], &x)); 543 PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, mfqP->work2, &ione)); 544 PetscCallBLAS("BLASnrm2", normd = BLASnrm2_(&blasn, mfqP->work, &ione)); 545 if (normd <= c) { 546 PetscCall(PetscBLASIntCast(PetscMax(mfqP->n - mfqP->nmodelpoints, 0), &blasj)); 547 if (!mfqP->q_is_I) { 548 /* project D onto null */ 549 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask)); 550 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &ione, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->work2, &ione, mfqP->mwork, &blasm, &info)); 551 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "ormqr returned value %" PetscBLASInt_FMT, info); 552 } 553 PetscCallBLAS("BLASnrm2", proj = BLASnrm2_(&blasj, &mfqP->work2[mfqP->nmodelpoints], &ione)); 554 555 if (proj >= mfqP->theta1) { /* add this index to model */ 556 mfqP->model_indices[mfqP->nmodelpoints] = i; 557 mfqP->nmodelpoints++; 558 PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, &mfqP->Q_tmp[mfqP->npmax * (mfqP->nmodelpoints - 1)], &ione)); 559 PetscCall(PetscBLASIntCast(mfqP->npmax * (mfqP->nmodelpoints), &blask)); 560 PetscCallBLAS("BLAScopy", BLAScopy_(&blask, mfqP->Q_tmp, &ione, mfqP->Q, &ione)); 561 PetscCall(PetscBLASIntCast(mfqP->nmodelpoints, &blask)); 562 PetscCall(PetscBLASIntCast(PetscMax(mfqP->m, mfqP->n), &blasmaxmn)); 563 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->mwork, &blasmaxmn, &info)); 564 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "geqrf returned value %" PetscBLASInt_FMT, info); 565 mfqP->q_is_I = 0; 566 } 567 if (mfqP->nmodelpoints == mfqP->n) break; 568 } 569 } 570 PetscFunctionReturn(PETSC_SUCCESS); 571 } 572 573 static PetscErrorCode TaoSolve_POUNDERS(Tao tao) 574 { 575 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 576 PetscInt i, ii, j, k, l; 577 PetscReal step = 1.0; 578 PetscInt low, high; 579 PetscReal minnorm; 580 PetscReal *x, *f; 581 const PetscReal *xmint, *fmin; 582 PetscReal deltaold; 583 PetscReal gnorm; 584 PetscBLASInt info, ione = 1, iblas; 585 PetscBool valid, same; 586 PetscReal mdec, rho, normxsp; 587 PetscReal one = 1.0, zero = 0.0, ratio; 588 PetscBLASInt blasm, blasn, blasncopy, blasnpmax; 589 static PetscBool set = PETSC_FALSE; 590 591 /* n = # of parameters 592 m = dimension (components) of function */ 593 PetscFunctionBegin; 594 PetscCall(PetscCitationsRegister("@article{UNEDF0,\n" 595 "title = {Nuclear energy density optimization},\n" 596 "author = {Kortelainen, M. and Lesinski, T. and Mor\'e, J. and Nazarewicz, W.\n" 597 " and Sarich, J. and Schunck, N. and Stoitsov, M. V. and Wild, S. },\n" 598 "journal = {Phys. Rev. C},\n" 599 "volume = {82},\n" 600 "number = {2},\n" 601 "pages = {024313},\n" 602 "numpages = {18},\n" 603 "year = {2010},\n" 604 "month = {Aug},\n" 605 "doi = {10.1103/PhysRevC.82.024313}\n}\n", 606 &set)); 607 tao->niter = 0; 608 if (tao->XL && tao->XU) { 609 /* Check x0 <= XU */ 610 PetscReal val; 611 612 PetscCall(VecCopy(tao->solution, mfqP->Xhist[0])); 613 PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU)); 614 PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 615 PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 > upper bound"); 616 617 /* Check x0 >= xl */ 618 PetscCall(VecCopy(tao->XL, mfqP->Xhist[0])); 619 PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->solution)); 620 PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 621 PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 < lower bound"); 622 623 /* Check x0 + delta < XU -- should be able to get around this eventually */ 624 625 PetscCall(VecSet(mfqP->Xhist[0], mfqP->delta)); 626 PetscCall(VecAXPY(mfqP->Xhist[0], 1.0, tao->solution)); 627 PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU)); 628 PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 629 PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 + delta > upper bound"); 630 } 631 632 PetscCall(PetscBLASIntCast(mfqP->m, &blasm)); 633 PetscCall(PetscBLASIntCast(mfqP->n, &blasn)); 634 PetscCall(PetscBLASIntCast(mfqP->npmax, &blasnpmax)); 635 for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; ++i) mfqP->H[i] = 0; 636 637 PetscCall(VecCopy(tao->solution, mfqP->Xhist[0])); 638 639 /* This provides enough information to approximate the gradient of the objective */ 640 /* using a forward difference scheme. */ 641 642 PetscCall(PetscInfo(tao, "Initialize simplex; delta = %10.9e\n", (double)mfqP->delta)); 643 PetscCall(pounders_feval(tao, mfqP->Xhist[0], mfqP->Fhist[0], &mfqP->Fres[0])); 644 mfqP->minindex = 0; 645 minnorm = mfqP->Fres[0]; 646 647 PetscCall(VecGetOwnershipRange(mfqP->Xhist[0], &low, &high)); 648 for (i = 1; i < mfqP->n + 1; ++i) { 649 PetscCall(VecCopy(mfqP->Xhist[0], mfqP->Xhist[i])); 650 651 if (i - 1 >= low && i - 1 < high) { 652 PetscCall(VecGetArray(mfqP->Xhist[i], &x)); 653 x[i - 1 - low] += mfqP->delta; 654 PetscCall(VecRestoreArray(mfqP->Xhist[i], &x)); 655 } 656 CHKMEMQ; 657 PetscCall(pounders_feval(tao, mfqP->Xhist[i], mfqP->Fhist[i], &mfqP->Fres[i])); 658 if (mfqP->Fres[i] < minnorm) { 659 mfqP->minindex = i; 660 minnorm = mfqP->Fres[i]; 661 } 662 } 663 PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 664 PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res)); 665 PetscCall(PetscInfo(tao, "Finalize simplex; minnorm = %10.9e\n", (double)minnorm)); 666 667 /* Gather mpi vecs to one big local vec */ 668 669 /* Begin serial code */ 670 671 /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */ 672 /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */ 673 /* (Column oriented for blas calls) */ 674 ii = 0; 675 676 PetscCall(PetscInfo(tao, "Build matrix: %d\n", mfqP->size)); 677 if (1 == mfqP->size) { 678 PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 679 for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 680 PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 681 PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 682 for (i = 0; i < mfqP->n + 1; i++) { 683 if (i == mfqP->minindex) continue; 684 685 PetscCall(VecGetArray(mfqP->Xhist[i], &x)); 686 for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 687 PetscCall(VecRestoreArray(mfqP->Xhist[i], &x)); 688 689 PetscCall(VecGetArray(mfqP->Fhist[i], &f)); 690 for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; 691 PetscCall(VecRestoreArray(mfqP->Fhist[i], &f)); 692 693 mfqP->model_indices[ii++] = i; 694 } 695 for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j]; 696 PetscCall(VecRestoreArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 697 } else { 698 PetscCall(VecSet(mfqP->localxmin, 0)); 699 PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD)); 700 PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD)); 701 702 PetscCall(VecGetArrayRead(mfqP->localxmin, &xmint)); 703 for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 704 PetscCall(VecRestoreArrayRead(mfqP->localxmin, &xmint)); 705 706 PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD)); 707 PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD)); 708 PetscCall(VecGetArrayRead(mfqP->localfmin, &fmin)); 709 for (i = 0; i < mfqP->n + 1; i++) { 710 if (i == mfqP->minindex) continue; 711 712 PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD)); 713 PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD)); 714 PetscCall(VecGetArray(mfqP->localx, &x)); 715 for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 716 PetscCall(VecRestoreArray(mfqP->localx, &x)); 717 718 PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD)); 719 PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD)); 720 PetscCall(VecGetArray(mfqP->localf, &f)); 721 for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; 722 PetscCall(VecRestoreArray(mfqP->localf, &f)); 723 724 mfqP->model_indices[ii++] = i; 725 } 726 for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j]; 727 PetscCall(VecRestoreArrayRead(mfqP->localfmin, &fmin)); 728 } 729 730 /* Determine the initial quadratic models */ 731 /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */ 732 /* D (nxn) Fdiff (nxm) => G (nxm) */ 733 blasncopy = blasn; 734 PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&blasn, &blasm, mfqP->Disp, &blasnpmax, mfqP->iwork, mfqP->Fdiff, &blasncopy, &info)); 735 PetscCall(PetscInfo(tao, "Linear solve return: %" PetscBLASInt_FMT "\n", info)); 736 737 PetscCall(pounders_update_res(tao)); 738 739 valid = PETSC_TRUE; 740 741 PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 742 PetscCall(VecAssemblyBegin(tao->gradient)); 743 PetscCall(VecAssemblyEnd(tao->gradient)); 744 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 745 gnorm *= mfqP->delta; 746 PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 747 748 tao->reason = TAO_CONTINUE_ITERATING; 749 PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its)); 750 PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step)); 751 PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 752 753 mfqP->nHist = mfqP->n + 1; 754 mfqP->nmodelpoints = mfqP->n + 1; 755 PetscCall(PetscInfo(tao, "Initial gradient: %20.19e\n", (double)gnorm)); 756 757 while (tao->reason == TAO_CONTINUE_ITERATING) { 758 PetscReal gnm = 1e-4; 759 /* Call general purpose update function */ 760 PetscTryTypeMethod(tao, update, tao->niter, tao->user_update); 761 tao->niter++; 762 /* Solve the subproblem min{Q(s): ||s|| <= 1.0} */ 763 PetscCall(gqtwrap(tao, &gnm, &mdec)); 764 /* Evaluate the function at the new point */ 765 766 for (i = 0; i < mfqP->n; i++) mfqP->work[i] = mfqP->Xsubproblem[i] * mfqP->delta + mfqP->xmin[i]; 767 PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[mfqP->nHist])); 768 PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[mfqP->nHist])); 769 PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, mfqP->work, INSERT_VALUES)); 770 PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist])); 771 PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist])); 772 773 PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist])); 774 775 rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec; 776 mfqP->nHist++; 777 778 /* Update the center */ 779 if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid == PETSC_TRUE)) { 780 /* Update model to reflect new base point */ 781 for (i = 0; i < mfqP->n; i++) mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i]) / mfqP->delta; 782 for (j = 0; j < mfqP->m; j++) { 783 /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work'; 784 G(:,j) = G(:,j) + H(:,:,j)*work' */ 785 for (k = 0; k < mfqP->n; k++) { 786 mfqP->work2[k] = 0.0; 787 for (l = 0; l < mfqP->n; l++) mfqP->work2[k] += mfqP->H[j + mfqP->m * (k + l * mfqP->n)] * mfqP->work[l]; 788 } 789 for (i = 0; i < mfqP->n; i++) { 790 mfqP->C[j] += mfqP->work[i] * (mfqP->Fdiff[i + mfqP->n * j] + 0.5 * mfqP->work2[i]); 791 mfqP->Fdiff[i + mfqP->n * j] += mfqP->work2[i]; 792 } 793 } 794 /* Cres += work*Gres + .5*work*Hres*work'; 795 Gres += Hres*work'; */ 796 797 PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &one, mfqP->Hres, &blasn, mfqP->work, &ione, &zero, mfqP->work2, &ione)); 798 for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] += mfqP->work2[i]; 799 mfqP->minindex = mfqP->nHist - 1; 800 minnorm = mfqP->Fres[mfqP->minindex]; 801 PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res)); 802 /* Change current center */ 803 PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 804 for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 805 PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 806 } 807 808 /* Evaluate at a model-improving point if necessary */ 809 if (valid == PETSC_FALSE) { 810 mfqP->q_is_I = 1; 811 mfqP->nmodelpoints = 0; 812 PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1)); 813 if (mfqP->nmodelpoints < mfqP->n) { 814 PetscCall(PetscInfo(tao, "Model not valid -- model-improving\n")); 815 PetscCall(modelimprove(tao, mfqP, 1)); 816 } 817 } 818 819 /* Update the trust region radius */ 820 deltaold = mfqP->delta; 821 normxsp = 0; 822 for (i = 0; i < mfqP->n; i++) normxsp += mfqP->Xsubproblem[i] * mfqP->Xsubproblem[i]; 823 normxsp = PetscSqrtReal(normxsp); 824 if (rho >= mfqP->eta1 && normxsp > 0.5 * mfqP->delta) { 825 mfqP->delta = PetscMin(mfqP->delta * mfqP->gamma1, mfqP->deltamax); 826 } else if (valid == PETSC_TRUE) { 827 mfqP->delta = PetscMax(mfqP->delta * mfqP->gamma0, mfqP->deltamin); 828 } 829 830 /* Compute the next interpolation set */ 831 mfqP->q_is_I = 1; 832 mfqP->nmodelpoints = 0; 833 PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c1 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c1)); 834 PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1)); 835 if (mfqP->nmodelpoints == mfqP->n) { 836 valid = PETSC_TRUE; 837 } else { 838 valid = PETSC_FALSE; 839 PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c2 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c2)); 840 PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c2)); 841 if (mfqP->n > mfqP->nmodelpoints) { 842 PetscCall(PetscInfo(tao, "Model not valid -- adding geometry points\n")); 843 PetscCall(modelimprove(tao, mfqP, mfqP->n - mfqP->nmodelpoints)); 844 } 845 } 846 for (i = mfqP->nmodelpoints; i > 0; i--) mfqP->model_indices[i] = mfqP->model_indices[i - 1]; 847 mfqP->nmodelpoints++; 848 mfqP->model_indices[0] = mfqP->minindex; 849 PetscCall(morepoints(mfqP)); 850 for (i = 0; i < mfqP->nmodelpoints; i++) { 851 PetscCall(VecGetArray(mfqP->Xhist[mfqP->model_indices[i]], &x)); 852 for (j = 0; j < mfqP->n; j++) mfqP->Disp[i + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / deltaold; 853 PetscCall(VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]], &x)); 854 PetscCall(VecGetArray(mfqP->Fhist[mfqP->model_indices[i]], &f)); 855 for (j = 0; j < mfqP->m; j++) { 856 for (k = 0; k < mfqP->n; k++) { 857 mfqP->work[k] = 0.0; 858 for (l = 0; l < mfqP->n; l++) mfqP->work[k] += mfqP->H[j + mfqP->m * (k + mfqP->n * l)] * mfqP->Disp[i + mfqP->npmax * l]; 859 } 860 PetscCallBLAS("BLASdot", mfqP->RES[j * mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn, &mfqP->Fdiff[j * mfqP->n], &ione, &mfqP->Disp[i], &blasnpmax) - 0.5 * BLASdot_(&blasn, mfqP->work, &ione, &mfqP->Disp[i], &blasnpmax) + f[j]); 861 } 862 PetscCall(VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]], &f)); 863 } 864 865 /* Update the quadratic model */ 866 PetscCall(PetscInfo(tao, "Get Quad, size: %" PetscInt_FMT ", points: %" PetscInt_FMT "\n", mfqP->n, mfqP->nmodelpoints)); 867 PetscCall(getquadpounders(mfqP)); 868 PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 869 PetscCallBLAS("BLAScopy", BLAScopy_(&blasm, fmin, &ione, mfqP->C, &ione)); 870 /* G = G*(delta/deltaold) + Gdel */ 871 ratio = mfqP->delta / deltaold; 872 iblas = blasm * blasn; 873 PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->Fdiff, &ione)); 874 PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Gdel, &ione, mfqP->Fdiff, &ione)); 875 /* H = H*(delta/deltaold)^2 + Hdel */ 876 iblas = blasm * blasn * blasn; 877 ratio *= ratio; 878 PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->H, &ione)); 879 PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Hdel, &ione, mfqP->H, &ione)); 880 881 /* Get residuals */ 882 PetscCall(pounders_update_res(tao)); 883 884 /* Export solution and gradient residual to TAO */ 885 PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 886 PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 887 PetscCall(VecAssemblyBegin(tao->gradient)); 888 PetscCall(VecAssemblyEnd(tao->gradient)); 889 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 890 gnorm *= mfqP->delta; 891 /* final criticality test */ 892 PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its)); 893 PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step)); 894 PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 895 /* test for repeated model */ 896 if (mfqP->nmodelpoints == mfqP->last_nmodelpoints) { 897 same = PETSC_TRUE; 898 } else { 899 same = PETSC_FALSE; 900 } 901 for (i = 0; i < mfqP->nmodelpoints; i++) { 902 if (same) { 903 if (mfqP->model_indices[i] == mfqP->last_model_indices[i]) { 904 same = PETSC_TRUE; 905 } else { 906 same = PETSC_FALSE; 907 } 908 } 909 mfqP->last_model_indices[i] = mfqP->model_indices[i]; 910 } 911 mfqP->last_nmodelpoints = mfqP->nmodelpoints; 912 if (same && mfqP->delta == deltaold) { 913 PetscCall(PetscInfo(tao, "Identical model used in successive iterations\n")); 914 tao->reason = TAO_CONVERGED_STEPTOL; 915 } 916 } 917 PetscFunctionReturn(PETSC_SUCCESS); 918 } 919 920 static PetscErrorCode TaoSetUp_POUNDERS(Tao tao) 921 { 922 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 923 PetscInt i, j; 924 IS isfloc, isfglob, isxloc, isxglob; 925 926 PetscFunctionBegin; 927 if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient)); 928 if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection)); 929 PetscCall(VecGetSize(tao->solution, &mfqP->n)); 930 PetscCall(VecGetSize(tao->ls_res, &mfqP->m)); 931 mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n); 932 if (mfqP->npmax == PETSC_CURRENT) mfqP->npmax = 2 * mfqP->n + 1; 933 mfqP->npmax = PetscMin((mfqP->n + 1) * (mfqP->n + 2) / 2, mfqP->npmax); 934 mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n + 2); 935 936 PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Xhist)); 937 PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fhist)); 938 for (i = 0; i < mfqP->n + 1; i++) { 939 PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[i])); 940 PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[i])); 941 } 942 PetscCall(VecDuplicate(tao->solution, &mfqP->workxvec)); 943 PetscCall(VecDuplicate(tao->ls_res, &mfqP->workfvec)); 944 mfqP->nHist = 0; 945 946 PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fres)); 947 PetscCall(PetscMalloc1(mfqP->npmax * mfqP->m, &mfqP->RES)); 948 PetscCall(PetscMalloc1(mfqP->n, &mfqP->work)); 949 PetscCall(PetscMalloc1(mfqP->n, &mfqP->work2)); 950 PetscCall(PetscMalloc1(mfqP->n, &mfqP->work3)); 951 PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n + 1), &mfqP->mwork)); 952 PetscCall(PetscMalloc1(mfqP->npmax - mfqP->n - 1, &mfqP->omega)); 953 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2, &mfqP->beta)); 954 PetscCall(PetscMalloc1(mfqP->n + 1, &mfqP->alpha)); 955 956 PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->H)); 957 PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q)); 958 PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q_tmp)); 959 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L)); 960 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_tmp)); 961 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_save)); 962 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->N)); 963 PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->n + 1), &mfqP->M)); 964 PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->npmax - mfqP->n - 1), &mfqP->Z)); 965 PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau)); 966 PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau_tmp)); 967 mfqP->nmax = PetscMax(5 * mfqP->npmax, mfqP->n * (mfqP->n + 1) / 2); 968 PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxwork)); 969 PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxiwork)); 970 PetscCall(PetscMalloc1(mfqP->n, &mfqP->xmin)); 971 PetscCall(PetscMalloc1(mfqP->m, &mfqP->C)); 972 PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Fdiff)); 973 PetscCall(PetscMalloc1(mfqP->npmax * mfqP->n, &mfqP->Disp)); 974 PetscCall(PetscMalloc1(mfqP->n, &mfqP->Gres)); 975 PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Hres)); 976 PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Gpoints)); 977 PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->model_indices)); 978 PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->last_model_indices)); 979 PetscCall(PetscMalloc1(mfqP->n, &mfqP->Xsubproblem)); 980 PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Gdel)); 981 PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->Hdel)); 982 PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n), &mfqP->indices)); 983 PetscCall(PetscMalloc1(mfqP->n, &mfqP->iwork)); 984 PetscCall(PetscMalloc1(mfqP->m * mfqP->m, &mfqP->w)); 985 for (i = 0; i < mfqP->m; i++) { 986 for (j = 0; j < mfqP->m; j++) { 987 if (i == j) { 988 mfqP->w[i + mfqP->m * j] = 1.0; 989 } else { 990 mfqP->w[i + mfqP->m * j] = 0.0; 991 } 992 } 993 } 994 for (i = 0; i < PetscMax(mfqP->m, mfqP->n); i++) mfqP->indices[i] = i; 995 PetscCallMPI(MPI_Comm_size(((PetscObject)tao)->comm, &mfqP->size)); 996 if (mfqP->size > 1) { 997 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localx)); 998 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localxmin)); 999 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localf)); 1000 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localfmin)); 1001 PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxloc)); 1002 PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxglob)); 1003 PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfloc)); 1004 PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfglob)); 1005 1006 PetscCall(VecScatterCreate(tao->solution, isxglob, mfqP->localx, isxloc, &mfqP->scatterx)); 1007 PetscCall(VecScatterCreate(tao->ls_res, isfglob, mfqP->localf, isfloc, &mfqP->scatterf)); 1008 1009 PetscCall(ISDestroy(&isxloc)); 1010 PetscCall(ISDestroy(&isxglob)); 1011 PetscCall(ISDestroy(&isfloc)); 1012 PetscCall(ISDestroy(&isfglob)); 1013 } 1014 1015 if (!mfqP->usegqt) { 1016 KSP ksp; 1017 PC pc; 1018 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Xsubproblem, &mfqP->subx)); 1019 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->subxl)); 1020 PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subb)); 1021 PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subxu)); 1022 PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subpdel)); 1023 PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subndel)); 1024 PetscCall(TaoCreate(PETSC_COMM_SELF, &mfqP->subtao)); 1025 PetscCall(PetscObjectIncrementTabLevel((PetscObject)mfqP->subtao, (PetscObject)tao, 1)); 1026 PetscCall(TaoSetType(mfqP->subtao, TAOBNTR)); 1027 PetscCall(TaoSetOptionsPrefix(mfqP->subtao, "pounders_subsolver_")); 1028 PetscCall(TaoSetSolution(mfqP->subtao, mfqP->subx)); 1029 PetscCall(TaoSetObjectiveAndGradient(mfqP->subtao, NULL, pounders_fg, (void *)mfqP)); 1030 PetscCall(TaoSetMaximumIterations(mfqP->subtao, mfqP->gqt_maxits)); 1031 PetscCall(TaoSetFromOptions(mfqP->subtao)); 1032 PetscCall(TaoGetKSP(mfqP->subtao, &ksp)); 1033 if (ksp) { 1034 PetscCall(KSPGetPC(ksp, &pc)); 1035 PetscCall(PCSetType(pc, PCNONE)); 1036 } 1037 PetscCall(TaoSetVariableBounds(mfqP->subtao, mfqP->subxl, mfqP->subxu)); 1038 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Hres, &mfqP->subH)); 1039 PetscCall(TaoSetHessian(mfqP->subtao, mfqP->subH, mfqP->subH, pounders_h, (void *)mfqP)); 1040 } 1041 PetscFunctionReturn(PETSC_SUCCESS); 1042 } 1043 1044 static PetscErrorCode TaoDestroy_POUNDERS(Tao tao) 1045 { 1046 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1047 PetscInt i; 1048 1049 PetscFunctionBegin; 1050 if (!mfqP->usegqt) { 1051 PetscCall(TaoDestroy(&mfqP->subtao)); 1052 PetscCall(VecDestroy(&mfqP->subx)); 1053 PetscCall(VecDestroy(&mfqP->subxl)); 1054 PetscCall(VecDestroy(&mfqP->subxu)); 1055 PetscCall(VecDestroy(&mfqP->subb)); 1056 PetscCall(VecDestroy(&mfqP->subpdel)); 1057 PetscCall(VecDestroy(&mfqP->subndel)); 1058 PetscCall(MatDestroy(&mfqP->subH)); 1059 } 1060 PetscCall(PetscFree(mfqP->Fres)); 1061 PetscCall(PetscFree(mfqP->RES)); 1062 PetscCall(PetscFree(mfqP->work)); 1063 PetscCall(PetscFree(mfqP->work2)); 1064 PetscCall(PetscFree(mfqP->work3)); 1065 PetscCall(PetscFree(mfqP->mwork)); 1066 PetscCall(PetscFree(mfqP->omega)); 1067 PetscCall(PetscFree(mfqP->beta)); 1068 PetscCall(PetscFree(mfqP->alpha)); 1069 PetscCall(PetscFree(mfqP->H)); 1070 PetscCall(PetscFree(mfqP->Q)); 1071 PetscCall(PetscFree(mfqP->Q_tmp)); 1072 PetscCall(PetscFree(mfqP->L)); 1073 PetscCall(PetscFree(mfqP->L_tmp)); 1074 PetscCall(PetscFree(mfqP->L_save)); 1075 PetscCall(PetscFree(mfqP->N)); 1076 PetscCall(PetscFree(mfqP->M)); 1077 PetscCall(PetscFree(mfqP->Z)); 1078 PetscCall(PetscFree(mfqP->tau)); 1079 PetscCall(PetscFree(mfqP->tau_tmp)); 1080 PetscCall(PetscFree(mfqP->npmaxwork)); 1081 PetscCall(PetscFree(mfqP->npmaxiwork)); 1082 PetscCall(PetscFree(mfqP->xmin)); 1083 PetscCall(PetscFree(mfqP->C)); 1084 PetscCall(PetscFree(mfqP->Fdiff)); 1085 PetscCall(PetscFree(mfqP->Disp)); 1086 PetscCall(PetscFree(mfqP->Gres)); 1087 PetscCall(PetscFree(mfqP->Hres)); 1088 PetscCall(PetscFree(mfqP->Gpoints)); 1089 PetscCall(PetscFree(mfqP->model_indices)); 1090 PetscCall(PetscFree(mfqP->last_model_indices)); 1091 PetscCall(PetscFree(mfqP->Xsubproblem)); 1092 PetscCall(PetscFree(mfqP->Gdel)); 1093 PetscCall(PetscFree(mfqP->Hdel)); 1094 PetscCall(PetscFree(mfqP->indices)); 1095 PetscCall(PetscFree(mfqP->iwork)); 1096 PetscCall(PetscFree(mfqP->w)); 1097 for (i = 0; i < mfqP->nHist; i++) { 1098 PetscCall(VecDestroy(&mfqP->Xhist[i])); 1099 PetscCall(VecDestroy(&mfqP->Fhist[i])); 1100 } 1101 PetscCall(VecDestroy(&mfqP->workxvec)); 1102 PetscCall(VecDestroy(&mfqP->workfvec)); 1103 PetscCall(PetscFree(mfqP->Xhist)); 1104 PetscCall(PetscFree(mfqP->Fhist)); 1105 1106 if (mfqP->size > 1) { 1107 PetscCall(VecDestroy(&mfqP->localx)); 1108 PetscCall(VecDestroy(&mfqP->localxmin)); 1109 PetscCall(VecDestroy(&mfqP->localf)); 1110 PetscCall(VecDestroy(&mfqP->localfmin)); 1111 } 1112 PetscCall(PetscFree(tao->data)); 1113 PetscFunctionReturn(PETSC_SUCCESS); 1114 } 1115 1116 static PetscErrorCode TaoSetFromOptions_POUNDERS(Tao tao, PetscOptionItems PetscOptionsObject) 1117 { 1118 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1119 1120 PetscFunctionBegin; 1121 PetscOptionsHeadBegin(PetscOptionsObject, "POUNDERS method for least-squares optimization"); 1122 PetscCall(PetscOptionsReal("-tao_pounders_delta", "initial delta", "", mfqP->delta, &mfqP->delta0, NULL)); 1123 mfqP->delta = mfqP->delta0; 1124 PetscCall(PetscOptionsInt("-tao_pounders_npmax", "max number of points in model", "", mfqP->npmax, &mfqP->npmax, NULL)); 1125 PetscCall(PetscOptionsBool("-tao_pounders_gqt", "use gqt algorithm for subproblem", "", mfqP->usegqt, &mfqP->usegqt, NULL)); 1126 PetscOptionsHeadEnd(); 1127 PetscFunctionReturn(PETSC_SUCCESS); 1128 } 1129 1130 static PetscErrorCode TaoView_POUNDERS(Tao tao, PetscViewer viewer) 1131 { 1132 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1133 PetscBool isascii; 1134 1135 PetscFunctionBegin; 1136 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1137 if (isascii) { 1138 PetscCall(PetscViewerASCIIPrintf(viewer, "initial delta: %g\n", (double)mfqP->delta0)); 1139 PetscCall(PetscViewerASCIIPrintf(viewer, "final delta: %g\n", (double)mfqP->delta)); 1140 PetscCall(PetscViewerASCIIPrintf(viewer, "model points: %" PetscInt_FMT "\n", mfqP->nmodelpoints)); 1141 if (mfqP->usegqt) { 1142 PetscCall(PetscViewerASCIIPrintf(viewer, "subproblem solver: gqt\n")); 1143 } else { 1144 PetscCall(TaoView(mfqP->subtao, viewer)); 1145 } 1146 } 1147 PetscFunctionReturn(PETSC_SUCCESS); 1148 } 1149 /*MC 1150 TAOPOUNDERS - POUNDERS derivate-free model-based algorithm for nonlinear least squares 1151 1152 Options Database Keys: 1153 + -tao_pounders_delta - initial step length 1154 . -tao_pounders_npmax - maximum number of points in model 1155 - -tao_pounders_gqt - use gqt algorithm for subproblem instead of TRON 1156 1157 Level: beginner 1158 1159 M*/ 1160 1161 PETSC_EXTERN PetscErrorCode TaoCreate_POUNDERS(Tao tao) 1162 { 1163 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1164 1165 PetscFunctionBegin; 1166 tao->ops->setup = TaoSetUp_POUNDERS; 1167 tao->ops->solve = TaoSolve_POUNDERS; 1168 tao->ops->view = TaoView_POUNDERS; 1169 tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS; 1170 tao->ops->destroy = TaoDestroy_POUNDERS; 1171 1172 PetscCall(PetscNew(&mfqP)); 1173 tao->data = (void *)mfqP; 1174 1175 /* Override default settings (unless already changed) */ 1176 PetscCall(TaoParametersInitialize(tao)); 1177 PetscObjectParameterSetDefault(tao, max_it, 2000); 1178 PetscObjectParameterSetDefault(tao, max_funcs, 4000); 1179 1180 mfqP->npmax = PETSC_CURRENT; 1181 mfqP->delta0 = 0.1; 1182 mfqP->delta = 0.1; 1183 mfqP->deltamax = 1e3; 1184 mfqP->deltamin = 1e-6; 1185 mfqP->c2 = 10.0; 1186 mfqP->theta1 = 1e-5; 1187 mfqP->theta2 = 1e-4; 1188 mfqP->gamma0 = 0.5; 1189 mfqP->gamma1 = 2.0; 1190 mfqP->eta0 = 0.0; 1191 mfqP->eta1 = 0.1; 1192 mfqP->usegqt = PETSC_FALSE; 1193 mfqP->gqt_rtol = 0.001; 1194 mfqP->gqt_maxits = 50; 1195 mfqP->workxvec = NULL; 1196 PetscFunctionReturn(PETSC_SUCCESS); 1197 } 1198