1 #include <../src/tao/leastsquares/impls/pounders/pounders.h> 2 3 static PetscErrorCode pounders_h(Tao subtao, Vec v, Mat H, Mat Hpre, void *ctx) 4 { 5 PetscFunctionBegin; 6 PetscFunctionReturn(PETSC_SUCCESS); 7 } 8 9 static PetscErrorCode pounders_fg(Tao subtao, Vec x, PetscReal *f, Vec g, void *ctx) 10 { 11 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)ctx; 12 PetscReal d1, d2; 13 14 PetscFunctionBegin; 15 /* g = A*x (add b later)*/ 16 PetscCall(MatMult(mfqP->subH, x, g)); 17 18 /* f = 1/2 * x'*(Ax) + b'*x */ 19 PetscCall(VecDot(x, g, &d1)); 20 PetscCall(VecDot(mfqP->subb, x, &d2)); 21 *f = 0.5 * d1 + d2; 22 23 /* now g = g + b */ 24 PetscCall(VecAXPY(g, 1.0, mfqP->subb)); 25 PetscFunctionReturn(PETSC_SUCCESS); 26 } 27 28 static PetscErrorCode pounders_feval(Tao tao, Vec x, Vec F, PetscReal *fsum) 29 { 30 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 31 PetscInt i, row, col; 32 PetscReal fr, fc; 33 34 PetscFunctionBegin; 35 PetscCall(TaoComputeResidual(tao, x, F)); 36 if (tao->res_weights_v) { 37 PetscCall(VecPointwiseMult(mfqP->workfvec, tao->res_weights_v, F)); 38 PetscCall(VecDot(mfqP->workfvec, mfqP->workfvec, fsum)); 39 } else if (tao->res_weights_w) { 40 *fsum = 0; 41 for (i = 0; i < tao->res_weights_n; i++) { 42 row = tao->res_weights_rows[i]; 43 col = tao->res_weights_cols[i]; 44 PetscCall(VecGetValues(F, 1, &row, &fr)); 45 PetscCall(VecGetValues(F, 1, &col, &fc)); 46 *fsum += tao->res_weights_w[i] * fc * fr; 47 } 48 } else { 49 PetscCall(VecDot(F, F, fsum)); 50 } 51 PetscCall(PetscInfo(tao, "Least-squares residual norm: %20.19e\n", (double)*fsum)); 52 PetscCheck(!PetscIsInfOrNanReal(*fsum), PETSC_COMM_SELF, PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 53 PetscFunctionReturn(PETSC_SUCCESS); 54 } 55 56 static PetscErrorCode gqtwrap(Tao tao, PetscReal *gnorm, PetscReal *qmin) 57 { 58 #if defined(PETSC_USE_REAL_SINGLE) 59 PetscReal atol = 1.0e-5; 60 #else 61 PetscReal atol = 1.0e-10; 62 #endif 63 PetscInt info, its; 64 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 65 66 PetscFunctionBegin; 67 if (!mfqP->usegqt) { 68 PetscReal maxval; 69 PetscInt i, j; 70 71 PetscCall(VecSetValues(mfqP->subb, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 72 PetscCall(VecAssemblyBegin(mfqP->subb)); 73 PetscCall(VecAssemblyEnd(mfqP->subb)); 74 75 PetscCall(VecSet(mfqP->subx, 0.0)); 76 77 PetscCall(VecSet(mfqP->subndel, -1.0)); 78 PetscCall(VecSet(mfqP->subpdel, +1.0)); 79 80 /* Complete the lower triangle of the Hessian matrix */ 81 for (i = 0; i < mfqP->n; i++) { 82 for (j = i + 1; j < mfqP->n; j++) mfqP->Hres[j + mfqP->n * i] = mfqP->Hres[mfqP->n * j + i]; 83 } 84 PetscCall(MatSetValues(mfqP->subH, mfqP->n, mfqP->indices, mfqP->n, mfqP->indices, mfqP->Hres, INSERT_VALUES)); 85 PetscCall(MatAssemblyBegin(mfqP->subH, MAT_FINAL_ASSEMBLY)); 86 PetscCall(MatAssemblyEnd(mfqP->subH, MAT_FINAL_ASSEMBLY)); 87 88 PetscCall(TaoResetStatistics(mfqP->subtao)); 89 /* PetscCall(TaoSetTolerances(mfqP->subtao,*gnorm,*gnorm,PETSC_DEFAULT)); */ 90 /* enforce bound constraints -- experimental */ 91 if (tao->XU && tao->XL) { 92 PetscCall(VecCopy(tao->XU, mfqP->subxu)); 93 PetscCall(VecAXPY(mfqP->subxu, -1.0, tao->solution)); 94 PetscCall(VecScale(mfqP->subxu, 1.0 / mfqP->delta)); 95 PetscCall(VecCopy(tao->XL, mfqP->subxl)); 96 PetscCall(VecAXPY(mfqP->subxl, -1.0, tao->solution)); 97 PetscCall(VecScale(mfqP->subxl, 1.0 / mfqP->delta)); 98 99 PetscCall(VecPointwiseMin(mfqP->subxu, mfqP->subxu, mfqP->subpdel)); 100 PetscCall(VecPointwiseMax(mfqP->subxl, mfqP->subxl, mfqP->subndel)); 101 } else { 102 PetscCall(VecCopy(mfqP->subpdel, mfqP->subxu)); 103 PetscCall(VecCopy(mfqP->subndel, mfqP->subxl)); 104 } 105 /* Make sure xu > xl */ 106 PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 107 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 108 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 109 PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "upper bound < lower bound in subproblem"); 110 /* Make sure xu > tao->solution > xl */ 111 PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 112 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx)); 113 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 114 PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess < lower bound in subproblem"); 115 116 PetscCall(VecCopy(mfqP->subx, mfqP->subpdel)); 117 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 118 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 119 PetscCheck(maxval <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "initial guess > upper bound in subproblem"); 120 121 PetscCall(TaoSolve(mfqP->subtao)); 122 PetscCall(TaoGetSolutionStatus(mfqP->subtao, NULL, qmin, NULL, NULL, NULL, NULL)); 123 124 /* test bounds post-solution*/ 125 PetscCall(VecCopy(mfqP->subxl, mfqP->subpdel)); 126 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subx)); 127 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 128 if (maxval > 1e-5) { 129 PetscCall(PetscInfo(tao, "subproblem solution < lower bound\n")); 130 tao->reason = TAO_DIVERGED_TR_REDUCTION; 131 } 132 133 PetscCall(VecCopy(mfqP->subx, mfqP->subpdel)); 134 PetscCall(VecAXPY(mfqP->subpdel, -1.0, mfqP->subxu)); 135 PetscCall(VecMax(mfqP->subpdel, NULL, &maxval)); 136 if (maxval > 1e-5) { 137 PetscCall(PetscInfo(tao, "subproblem solution > upper bound\n")); 138 tao->reason = TAO_DIVERGED_TR_REDUCTION; 139 } 140 } else { 141 PetscCall(gqt(mfqP->n, mfqP->Hres, mfqP->n, mfqP->Gres, 1.0, mfqP->gqt_rtol, atol, mfqP->gqt_maxits, gnorm, qmin, mfqP->Xsubproblem, &info, &its, mfqP->work, mfqP->work2, mfqP->work3)); 142 } 143 *qmin *= -1; 144 PetscFunctionReturn(PETSC_SUCCESS); 145 } 146 147 static PetscErrorCode pounders_update_res(Tao tao) 148 { 149 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 150 PetscInt i, row, col; 151 PetscBLASInt blasn = mfqP->n, blasn2 = blasn * blasn, blasm = mfqP->m, ione = 1; 152 PetscReal zero = 0.0, one = 1.0, wii, factor; 153 154 PetscFunctionBegin; 155 for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] = 0; 156 for (i = 0; i < mfqP->n * mfqP->n; i++) mfqP->Hres[i] = 0; 157 158 /* Compute Gres= sum_ij[wij * (cjgi + cigj)] */ 159 if (tao->res_weights_v) { 160 /* Vector(diagonal) weights: gres = sum_i(wii*ci*gi) */ 161 for (i = 0; i < mfqP->m; i++) { 162 PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &factor)); 163 factor = factor * mfqP->C[i]; 164 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * i], &ione, mfqP->Gres, &ione)); 165 } 166 167 /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 168 /* vector(diagonal weights) Hres = sum_i(wii*(ci*Hi + gi * gi')*/ 169 for (i = 0; i < mfqP->m; i++) { 170 PetscCall(VecGetValues(tao->res_weights_v, 1, &i, &wii)); 171 if (tao->niter > 1) { 172 factor = wii * mfqP->C[i]; 173 /* add wii * ci * Hi */ 174 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione)); 175 } 176 /* add wii * gi * gi' */ 177 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &wii, &mfqP->Fdiff[blasn * i], &blasn, &mfqP->Fdiff[blasn * i], &blasn, &one, mfqP->Hres, &blasn)); 178 } 179 } else if (tao->res_weights_w) { 180 /* General case: .5 * Gres= sum_ij[wij * (cjgi + cigj)] */ 181 for (i = 0; i < tao->res_weights_n; i++) { 182 row = tao->res_weights_rows[i]; 183 col = tao->res_weights_cols[i]; 184 185 factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0; 186 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * row], &ione, mfqP->Gres, &ione)); 187 factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0; 188 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &factor, &mfqP->Fdiff[blasn * col], &ione, mfqP->Gres, &ione)); 189 } 190 191 /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 192 /* .5 * sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 193 for (i = 0; i < tao->res_weights_n; i++) { 194 row = tao->res_weights_rows[i]; 195 col = tao->res_weights_cols[i]; 196 factor = tao->res_weights_w[i] / 2.0; 197 /* add wij * gi gj' + wij * gj gi' */ 198 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * row], &blasn, &mfqP->Fdiff[blasn * col], &blasn, &one, mfqP->Hres, &blasn)); 199 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &ione, &factor, &mfqP->Fdiff[blasn * col], &blasn, &mfqP->Fdiff[blasn * row], &blasn, &one, mfqP->Hres, &blasn)); 200 } 201 if (tao->niter > 1) { 202 for (i = 0; i < tao->res_weights_n; i++) { 203 row = tao->res_weights_rows[i]; 204 col = tao->res_weights_cols[i]; 205 206 /* add wij*cj*Hi */ 207 factor = tao->res_weights_w[i] * mfqP->C[col] / 2.0; 208 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[row], &blasm, mfqP->Hres, &ione)); 209 210 /* add wij*ci*Hj */ 211 factor = tao->res_weights_w[i] * mfqP->C[row] / 2.0; 212 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[col], &blasm, mfqP->Hres, &ione)); 213 } 214 } 215 } else { 216 /* Default: Gres= sum_i[cigi] = G*c' */ 217 PetscCall(PetscInfo(tao, "Identity weights\n")); 218 PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->C, &ione, &zero, mfqP->Gres, &ione)); 219 220 /* compute Hres = sum_ij [wij * (*ci*Hj + cj*Hi + gi gj' + gj gi') ] */ 221 /* Hres = G*G' + 0.5 sum {F(xkin,i)*H(:,:,i)} */ 222 PetscCallBLAS("BLASgemm", BLASgemm_("N", "T", &blasn, &blasn, &blasm, &one, mfqP->Fdiff, &blasn, mfqP->Fdiff, &blasn, &zero, mfqP->Hres, &blasn)); 223 224 /* sum(F(xkin,i)*H(:,:,i)) */ 225 if (tao->niter > 1) { 226 for (i = 0; i < mfqP->m; i++) { 227 factor = mfqP->C[i]; 228 PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn2, &factor, &mfqP->H[i], &blasm, mfqP->Hres, &ione)); 229 } 230 } 231 } 232 PetscFunctionReturn(PETSC_SUCCESS); 233 } 234 235 static PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi) 236 { 237 /* Phi = .5*[x(1)^2 sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */ 238 PetscInt i, j, k; 239 PetscReal sqrt2 = PetscSqrtReal(2.0); 240 241 PetscFunctionBegin; 242 j = 0; 243 for (i = 0; i < n; i++) { 244 phi[j] = 0.5 * x[i] * x[i]; 245 j++; 246 for (k = i + 1; k < n; k++) { 247 phi[j] = x[i] * x[k] / sqrt2; 248 j++; 249 } 250 } 251 PetscFunctionReturn(PETSC_SUCCESS); 252 } 253 254 static PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP) 255 { 256 /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x' 257 that satisfies the interpolation conditions Q(X[:,j]) = f(j) 258 for j=1,...,m and with a Hessian matrix of least Frobenius norm */ 259 260 /* NB --we are ignoring c */ 261 PetscInt i, j, k, num, np = mfqP->nmodelpoints; 262 PetscReal one = 1.0, zero = 0.0, negone = -1.0; 263 PetscBLASInt blasnpmax = mfqP->npmax; 264 PetscBLASInt blasnplus1 = mfqP->n + 1; 265 PetscBLASInt blasnp = np; 266 PetscBLASInt blasint = mfqP->n * (mfqP->n + 1) / 2; 267 PetscBLASInt blasint2 = np - mfqP->n - 1; 268 PetscBLASInt info, ione = 1; 269 PetscReal sqrt2 = PetscSqrtReal(2.0); 270 271 PetscFunctionBegin; 272 for (i = 0; i < mfqP->n * mfqP->m; i++) mfqP->Gdel[i] = 0; 273 for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; i++) mfqP->Hdel[i] = 0; 274 275 /* factor M */ 276 PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&blasnplus1, &blasnp, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &info)); 277 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrf returned with value %" PetscBLASInt_FMT, info); 278 279 if (np == mfqP->n + 1) { 280 for (i = 0; i < mfqP->npmax - mfqP->n - 1; i++) mfqP->omega[i] = 0.0; 281 for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->beta[i] = 0.0; 282 } else { 283 /* Let Ltmp = (L'*L) */ 284 PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &blasint2, &blasint2, &blasint, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, &zero, mfqP->L_tmp, &blasint)); 285 286 /* factor Ltmp */ 287 PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &blasint2, mfqP->L_tmp, &blasint, &info)); 288 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrf returned with value %" PetscBLASInt_FMT, info); 289 } 290 291 for (k = 0; k < mfqP->m; k++) { 292 if (np != mfqP->n + 1) { 293 /* Solve L'*L*Omega = Z' * RESk*/ 294 PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasnp, &blasint2, &one, mfqP->Z, &blasnpmax, &mfqP->RES[mfqP->npmax * k], &ione, &zero, mfqP->omega, &ione)); 295 PetscCallBLAS("LAPACKpotrs", LAPACKpotrs_("L", &blasint2, &ione, mfqP->L_tmp, &blasint, mfqP->omega, &blasint2, &info)); 296 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine potrs returned with value %" PetscBLASInt_FMT, info); 297 298 /* Beta = L*Omega */ 299 PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasint, &blasint2, &one, &mfqP->L[(mfqP->n + 1) * blasint], &blasint, mfqP->omega, &ione, &zero, mfqP->beta, &ione)); 300 } 301 302 /* solve M'*Alpha = RESk - N'*Beta */ 303 PetscCallBLAS("BLASgemv", BLASgemv_("T", &blasint, &blasnp, &negone, mfqP->N, &blasint, mfqP->beta, &ione, &one, &mfqP->RES[mfqP->npmax * k], &ione)); 304 PetscCallBLAS("LAPACKgetrs", LAPACKgetrs_("T", &blasnplus1, &ione, mfqP->M, &blasnplus1, mfqP->npmaxiwork, &mfqP->RES[mfqP->npmax * k], &blasnplus1, &info)); 305 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine getrs returned with value %" PetscBLASInt_FMT, info); 306 307 /* Gdel(:,k) = Alpha(2:n+1) */ 308 for (i = 0; i < mfqP->n; i++) mfqP->Gdel[i + mfqP->n * k] = mfqP->RES[mfqP->npmax * k + i + 1]; 309 310 /* Set Hdels */ 311 num = 0; 312 for (i = 0; i < mfqP->n; i++) { 313 /* H[i,i,k] = Beta(num) */ 314 mfqP->Hdel[(i * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num]; 315 num++; 316 for (j = i + 1; j < mfqP->n; j++) { 317 /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */ 318 mfqP->Hdel[(j * mfqP->n + i) * mfqP->m + k] = mfqP->beta[num] / sqrt2; 319 mfqP->Hdel[(i * mfqP->n + j) * mfqP->m + k] = mfqP->beta[num] / sqrt2; 320 num++; 321 } 322 } 323 } 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode morepoints(TAO_POUNDERS *mfqP) 328 { 329 /* Assumes mfqP->model_indices[0] is minimum index 330 Finishes adding points to mfqP->model_indices (up to npmax) 331 Computes L,Z,M,N 332 np is actual number of points in model (should equal npmax?) */ 333 PetscInt point, i, j, offset; 334 PetscInt reject; 335 PetscBLASInt blasn = mfqP->n, blasnpmax = mfqP->npmax, blasnplus1 = mfqP->n + 1, info, blasnmax = mfqP->nmax, blasint, blasint2, blasnp, blasmaxmn; 336 const PetscReal *x; 337 PetscReal normd; 338 339 PetscFunctionBegin; 340 /* Initialize M,N */ 341 for (i = 0; i < mfqP->n + 1; i++) { 342 PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x)); 343 mfqP->M[(mfqP->n + 1) * i] = 1.0; 344 for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * i)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 345 PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->model_indices[i]], &x)); 346 PetscCall(phi2eval(&mfqP->M[1 + ((mfqP->n + 1) * i)], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * i])); 347 } 348 349 /* Now we add points until we have npmax starting with the most recent ones */ 350 point = mfqP->nHist - 1; 351 mfqP->nmodelpoints = mfqP->n + 1; 352 while (mfqP->nmodelpoints < mfqP->npmax && point >= 0) { 353 /* Reject any points already in the model */ 354 reject = 0; 355 for (j = 0; j < mfqP->n + 1; j++) { 356 if (point == mfqP->model_indices[j]) { 357 reject = 1; 358 break; 359 } 360 } 361 362 /* Reject if norm(d) >c2 */ 363 if (!reject) { 364 PetscCall(VecCopy(mfqP->Xhist[point], mfqP->workxvec)); 365 PetscCall(VecAXPY(mfqP->workxvec, -1.0, mfqP->Xhist[mfqP->minindex])); 366 PetscCall(VecNorm(mfqP->workxvec, NORM_2, &normd)); 367 normd /= mfqP->delta; 368 if (normd > mfqP->c2) reject = 1; 369 } 370 if (reject) { 371 point--; 372 continue; 373 } 374 375 PetscCall(VecGetArrayRead(mfqP->Xhist[point], &x)); 376 mfqP->M[(mfqP->n + 1) * mfqP->nmodelpoints] = 1.0; 377 for (j = 0; j < mfqP->n; j++) mfqP->M[j + 1 + ((mfqP->n + 1) * mfqP->nmodelpoints)] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 378 PetscCall(VecRestoreArrayRead(mfqP->Xhist[point], &x)); 379 PetscCall(phi2eval(&mfqP->M[1 + (mfqP->n + 1) * mfqP->nmodelpoints], mfqP->n, &mfqP->N[mfqP->n * (mfqP->n + 1) / 2 * (mfqP->nmodelpoints)])); 380 381 /* Update QR factorization */ 382 /* Copy M' to Q_tmp */ 383 for (i = 0; i < mfqP->n + 1; i++) { 384 for (j = 0; j < mfqP->npmax; j++) mfqP->Q_tmp[j + mfqP->npmax * i] = mfqP->M[i + (mfqP->n + 1) * j]; 385 } 386 blasnp = mfqP->nmodelpoints + 1; 387 /* Q_tmp,R = qr(M') */ 388 blasmaxmn = PetscMax(mfqP->m, mfqP->n + 1); 389 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->mwork, &blasmaxmn, &info)); 390 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine geqrf returned with value %" PetscBLASInt_FMT, info); 391 392 /* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */ 393 /* L = N*Qtmp */ 394 blasint2 = mfqP->n * (mfqP->n + 1) / 2; 395 /* Copy N to L_tmp */ 396 for (i = 0; i < mfqP->n * (mfqP->n + 1) / 2 * mfqP->npmax; i++) mfqP->L_tmp[i] = mfqP->N[i]; 397 /* Copy L_save to L_tmp */ 398 399 /* L_tmp = N*Qtmp' */ 400 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasint2, &blasnp, &blasnplus1, mfqP->Q_tmp, &blasnpmax, mfqP->tau_tmp, mfqP->L_tmp, &blasint2, mfqP->npmaxwork, &blasnmax, &info)); 401 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info); 402 403 /* Copy L_tmp to L_save */ 404 for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L_save[i] = mfqP->L_tmp[i]; 405 406 /* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */ 407 blasint = mfqP->nmodelpoints - mfqP->n; 408 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); 409 PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("N", "N", &blasint2, &blasint, &mfqP->L_tmp[(mfqP->n + 1) * blasint2], &blasint2, mfqP->beta, mfqP->work, &blasn, mfqP->work, &blasn, mfqP->npmaxwork, &blasnmax, &info)); 410 PetscCall(PetscFPTrapPop()); 411 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine gesvd returned with value %" PetscBLASInt_FMT, info); 412 413 if (mfqP->beta[PetscMin(blasint, blasint2) - 1] > mfqP->theta2) { 414 /* accept point */ 415 mfqP->model_indices[mfqP->nmodelpoints] = point; 416 /* Copy Q_tmp to Q */ 417 for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q[i] = mfqP->Q_tmp[i]; 418 for (i = 0; i < mfqP->npmax; i++) mfqP->tau[i] = mfqP->tau_tmp[i]; 419 mfqP->nmodelpoints++; 420 blasnp = mfqP->nmodelpoints; 421 422 /* Copy L_save to L */ 423 for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = mfqP->L_save[i]; 424 } 425 point--; 426 } 427 428 blasnp = mfqP->nmodelpoints; 429 /* Copy Q(:,n+2:np) to Z */ 430 /* First set Q_tmp to I */ 431 for (i = 0; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Q_tmp[i] = 0.0; 432 for (i = 0; i < mfqP->npmax; i++) mfqP->Q_tmp[i + mfqP->npmax * i] = 1.0; 433 434 /* Q_tmp = I * Q */ 435 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasnp, &blasnp, &blasnplus1, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info)); 436 PetscCheck(info == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACK routine ormqr returned with value %" PetscBLASInt_FMT, info); 437 438 /* Copy Q_tmp(:,n+2:np) to Z) */ 439 offset = mfqP->npmax * (mfqP->n + 1); 440 for (i = offset; i < mfqP->npmax * mfqP->npmax; i++) mfqP->Z[i - offset] = mfqP->Q_tmp[i]; 441 442 if (mfqP->nmodelpoints == mfqP->n + 1) { 443 /* Set L to I_{n+1} */ 444 for (i = 0; i < mfqP->npmax * mfqP->n * (mfqP->n + 1) / 2; i++) mfqP->L[i] = 0.0; 445 for (i = 0; i < mfqP->n; i++) mfqP->L[(mfqP->n * (mfqP->n + 1) / 2) * i + i] = 1.0; 446 } 447 PetscFunctionReturn(PETSC_SUCCESS); 448 } 449 450 /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */ 451 static PetscErrorCode addpoint(Tao tao, TAO_POUNDERS *mfqP, PetscInt index) 452 { 453 PetscFunctionBegin; 454 /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/ 455 PetscCall(VecDuplicate(mfqP->Xhist[0], &mfqP->Xhist[mfqP->nHist])); 456 PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, &mfqP->Q_tmp[index * mfqP->npmax], INSERT_VALUES)); 457 PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist])); 458 PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist])); 459 PetscCall(VecAYPX(mfqP->Xhist[mfqP->nHist], mfqP->delta, mfqP->Xhist[mfqP->minindex])); 460 461 /* Project into feasible region */ 462 if (tao->XU && tao->XL) PetscCall(VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU, mfqP->Xhist[mfqP->nHist])); 463 464 /* Compute value of new vector */ 465 PetscCall(VecDuplicate(mfqP->Fhist[0], &mfqP->Fhist[mfqP->nHist])); 466 CHKMEMQ; 467 PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist])); 468 469 /* Add new vector to model */ 470 mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist; 471 mfqP->nmodelpoints++; 472 mfqP->nHist++; 473 PetscFunctionReturn(PETSC_SUCCESS); 474 } 475 476 static PetscErrorCode modelimprove(Tao tao, TAO_POUNDERS *mfqP, PetscInt addallpoints) 477 { 478 /* modeld = Q(:,np+1:n)' */ 479 PetscInt i, j, minindex = 0; 480 PetscReal dp, half = 0.5, one = 1.0, minvalue = PETSC_INFINITY; 481 PetscBLASInt blasn = mfqP->n, blasnpmax = mfqP->npmax, blask, info; 482 PetscBLASInt blas1 = 1, blasnmax = mfqP->nmax; 483 484 PetscFunctionBegin; 485 blask = mfqP->nmodelpoints; 486 /* Qtmp = I(n x n) */ 487 for (i = 0; i < mfqP->n; i++) { 488 for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i + mfqP->npmax * j] = 0.0; 489 } 490 for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[j + mfqP->npmax * j] = 1.0; 491 492 /* Qtmp = Q * I */ 493 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &blasn, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork, &blasnmax, &info)); 494 495 for (i = mfqP->nmodelpoints; i < mfqP->n; i++) { 496 PetscCallBLAS("BLASdot", dp = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->Gres, &blas1)); 497 if (dp > 0.0) { /* Model says use the other direction! */ 498 for (j = 0; j < mfqP->n; j++) mfqP->Q_tmp[i * mfqP->npmax + j] *= -1; 499 } 500 /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */ 501 for (j = 0; j < mfqP->n; j++) mfqP->work2[j] = mfqP->Gres[j]; 502 PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &half, mfqP->Hres, &blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, &one, mfqP->work2, &blas1)); 503 PetscCallBLAS("BLASdot", mfqP->work[i] = BLASdot_(&blasn, &mfqP->Q_tmp[i * mfqP->npmax], &blas1, mfqP->work2, &blas1)); 504 if (i == mfqP->nmodelpoints || mfqP->work[i] < minvalue) { 505 minindex = i; 506 minvalue = mfqP->work[i]; 507 } 508 if (addallpoints != 0) PetscCall(addpoint(tao, mfqP, i)); 509 } 510 if (!addallpoints) PetscCall(addpoint(tao, mfqP, minindex)); 511 PetscFunctionReturn(PETSC_SUCCESS); 512 } 513 514 static PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin, PetscReal c) 515 { 516 PetscInt i, j; 517 PetscBLASInt blasm = mfqP->m, blasj, blask, blasn = mfqP->n, ione = 1, info; 518 PetscBLASInt blasnpmax = mfqP->npmax, blasmaxmn; 519 PetscReal proj, normd; 520 const PetscReal *x; 521 522 PetscFunctionBegin; 523 for (i = mfqP->nHist - 1; i >= 0; i--) { 524 PetscCall(VecGetArrayRead(mfqP->Xhist[i], &x)); 525 for (j = 0; j < mfqP->n; j++) mfqP->work[j] = (x[j] - xmin[j]) / mfqP->delta; 526 PetscCall(VecRestoreArrayRead(mfqP->Xhist[i], &x)); 527 PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, mfqP->work2, &ione)); 528 PetscCallBLAS("BLASnrm2", normd = BLASnrm2_(&blasn, mfqP->work, &ione)); 529 if (normd <= c) { 530 blasj = PetscMax((mfqP->n - mfqP->nmodelpoints), 0); 531 if (!mfqP->q_is_I) { 532 /* project D onto null */ 533 blask = (mfqP->nmodelpoints); 534 PetscCallBLAS("LAPACKormqr", LAPACKormqr_("R", "N", &ione, &blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->work2, &ione, mfqP->mwork, &blasm, &info)); 535 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "ormqr returned value %" PetscBLASInt_FMT, info); 536 } 537 PetscCallBLAS("BLASnrm2", proj = BLASnrm2_(&blasj, &mfqP->work2[mfqP->nmodelpoints], &ione)); 538 539 if (proj >= mfqP->theta1) { /* add this index to model */ 540 mfqP->model_indices[mfqP->nmodelpoints] = i; 541 mfqP->nmodelpoints++; 542 PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, mfqP->work, &ione, &mfqP->Q_tmp[mfqP->npmax * (mfqP->nmodelpoints - 1)], &ione)); 543 blask = mfqP->npmax * (mfqP->nmodelpoints); 544 PetscCallBLAS("BLAScopy", BLAScopy_(&blask, mfqP->Q_tmp, &ione, mfqP->Q, &ione)); 545 blask = mfqP->nmodelpoints; 546 blasmaxmn = PetscMax(mfqP->m, mfqP->n); 547 PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&blasn, &blask, mfqP->Q, &blasnpmax, mfqP->tau, mfqP->mwork, &blasmaxmn, &info)); 548 PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "geqrf returned value %" PetscBLASInt_FMT, info); 549 mfqP->q_is_I = 0; 550 } 551 if (mfqP->nmodelpoints == mfqP->n) break; 552 } 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode TaoSolve_POUNDERS(Tao tao) 558 { 559 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 560 PetscInt i, ii, j, k, l; 561 PetscReal step = 1.0; 562 PetscInt low, high; 563 PetscReal minnorm; 564 PetscReal *x, *f; 565 const PetscReal *xmint, *fmin; 566 PetscReal deltaold; 567 PetscReal gnorm; 568 PetscBLASInt info, ione = 1, iblas; 569 PetscBool valid, same; 570 PetscReal mdec, rho, normxsp; 571 PetscReal one = 1.0, zero = 0.0, ratio; 572 PetscBLASInt blasm, blasn, blasncopy, blasnpmax; 573 static PetscBool set = PETSC_FALSE; 574 575 /* n = # of parameters 576 m = dimension (components) of function */ 577 PetscFunctionBegin; 578 PetscCall(PetscCitationsRegister("@article{UNEDF0,\n" 579 "title = {Nuclear energy density optimization},\n" 580 "author = {Kortelainen, M. and Lesinski, T. and Mor\'e, J. and Nazarewicz, W.\n" 581 " and Sarich, J. and Schunck, N. and Stoitsov, M. V. and Wild, S. },\n" 582 "journal = {Phys. Rev. C},\n" 583 "volume = {82},\n" 584 "number = {2},\n" 585 "pages = {024313},\n" 586 "numpages = {18},\n" 587 "year = {2010},\n" 588 "month = {Aug},\n" 589 "doi = {10.1103/PhysRevC.82.024313}\n}\n", 590 &set)); 591 tao->niter = 0; 592 if (tao->XL && tao->XU) { 593 /* Check x0 <= XU */ 594 PetscReal val; 595 596 PetscCall(VecCopy(tao->solution, mfqP->Xhist[0])); 597 PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU)); 598 PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 599 PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 > upper bound"); 600 601 /* Check x0 >= xl */ 602 PetscCall(VecCopy(tao->XL, mfqP->Xhist[0])); 603 PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->solution)); 604 PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 605 PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 < lower bound"); 606 607 /* Check x0 + delta < XU -- should be able to get around this eventually */ 608 609 PetscCall(VecSet(mfqP->Xhist[0], mfqP->delta)); 610 PetscCall(VecAXPY(mfqP->Xhist[0], 1.0, tao->solution)); 611 PetscCall(VecAXPY(mfqP->Xhist[0], -1.0, tao->XU)); 612 PetscCall(VecMax(mfqP->Xhist[0], NULL, &val)); 613 PetscCheck(val <= 1e-10, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_OUTOFRANGE, "X0 + delta > upper bound"); 614 } 615 616 blasm = mfqP->m; 617 blasn = mfqP->n; 618 blasnpmax = mfqP->npmax; 619 for (i = 0; i < mfqP->n * mfqP->n * mfqP->m; ++i) mfqP->H[i] = 0; 620 621 PetscCall(VecCopy(tao->solution, mfqP->Xhist[0])); 622 623 /* This provides enough information to approximate the gradient of the objective */ 624 /* using a forward difference scheme. */ 625 626 PetscCall(PetscInfo(tao, "Initialize simplex; delta = %10.9e\n", (double)mfqP->delta)); 627 PetscCall(pounders_feval(tao, mfqP->Xhist[0], mfqP->Fhist[0], &mfqP->Fres[0])); 628 mfqP->minindex = 0; 629 minnorm = mfqP->Fres[0]; 630 631 PetscCall(VecGetOwnershipRange(mfqP->Xhist[0], &low, &high)); 632 for (i = 1; i < mfqP->n + 1; ++i) { 633 PetscCall(VecCopy(mfqP->Xhist[0], mfqP->Xhist[i])); 634 635 if (i - 1 >= low && i - 1 < high) { 636 PetscCall(VecGetArray(mfqP->Xhist[i], &x)); 637 x[i - 1 - low] += mfqP->delta; 638 PetscCall(VecRestoreArray(mfqP->Xhist[i], &x)); 639 } 640 CHKMEMQ; 641 PetscCall(pounders_feval(tao, mfqP->Xhist[i], mfqP->Fhist[i], &mfqP->Fres[i])); 642 if (mfqP->Fres[i] < minnorm) { 643 mfqP->minindex = i; 644 minnorm = mfqP->Fres[i]; 645 } 646 } 647 PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 648 PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res)); 649 PetscCall(PetscInfo(tao, "Finalize simplex; minnorm = %10.9e\n", (double)minnorm)); 650 651 /* Gather mpi vecs to one big local vec */ 652 653 /* Begin serial code */ 654 655 /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */ 656 /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */ 657 /* (Column oriented for blas calls) */ 658 ii = 0; 659 660 PetscCall(PetscInfo(tao, "Build matrix: %" PetscInt_FMT "\n", (PetscInt)mfqP->size)); 661 if (1 == mfqP->size) { 662 PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 663 for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 664 PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 665 PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 666 for (i = 0; i < mfqP->n + 1; i++) { 667 if (i == mfqP->minindex) continue; 668 669 PetscCall(VecGetArray(mfqP->Xhist[i], &x)); 670 for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 671 PetscCall(VecRestoreArray(mfqP->Xhist[i], &x)); 672 673 PetscCall(VecGetArray(mfqP->Fhist[i], &f)); 674 for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; 675 PetscCall(VecRestoreArray(mfqP->Fhist[i], &f)); 676 677 mfqP->model_indices[ii++] = i; 678 } 679 for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j]; 680 PetscCall(VecRestoreArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 681 } else { 682 PetscCall(VecSet(mfqP->localxmin, 0)); 683 PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD)); 684 PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[mfqP->minindex], mfqP->localxmin, INSERT_VALUES, SCATTER_FORWARD)); 685 686 PetscCall(VecGetArrayRead(mfqP->localxmin, &xmint)); 687 for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 688 PetscCall(VecRestoreArrayRead(mfqP->localxmin, &xmint)); 689 690 PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD)); 691 PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[mfqP->minindex], mfqP->localfmin, INSERT_VALUES, SCATTER_FORWARD)); 692 PetscCall(VecGetArrayRead(mfqP->localfmin, &fmin)); 693 for (i = 0; i < mfqP->n + 1; i++) { 694 if (i == mfqP->minindex) continue; 695 696 PetscCall(VecScatterBegin(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD)); 697 PetscCall(VecScatterEnd(mfqP->scatterx, mfqP->Xhist[ii], mfqP->localx, INSERT_VALUES, SCATTER_FORWARD)); 698 PetscCall(VecGetArray(mfqP->localx, &x)); 699 for (j = 0; j < mfqP->n; j++) mfqP->Disp[ii + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / mfqP->delta; 700 PetscCall(VecRestoreArray(mfqP->localx, &x)); 701 702 PetscCall(VecScatterBegin(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD)); 703 PetscCall(VecScatterEnd(mfqP->scatterf, mfqP->Fhist[ii], mfqP->localf, INSERT_VALUES, SCATTER_FORWARD)); 704 PetscCall(VecGetArray(mfqP->localf, &f)); 705 for (j = 0; j < mfqP->m; j++) mfqP->Fdiff[ii + mfqP->n * j] = f[j] - fmin[j]; 706 PetscCall(VecRestoreArray(mfqP->localf, &f)); 707 708 mfqP->model_indices[ii++] = i; 709 } 710 for (j = 0; j < mfqP->m; j++) mfqP->C[j] = fmin[j]; 711 PetscCall(VecRestoreArrayRead(mfqP->localfmin, &fmin)); 712 } 713 714 /* Determine the initial quadratic models */ 715 /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */ 716 /* D (nxn) Fdiff (nxm) => G (nxm) */ 717 blasncopy = blasn; 718 PetscCallBLAS("LAPACKgesv", LAPACKgesv_(&blasn, &blasm, mfqP->Disp, &blasnpmax, mfqP->iwork, mfqP->Fdiff, &blasncopy, &info)); 719 PetscCall(PetscInfo(tao, "Linear solve return: %" PetscInt_FMT "\n", (PetscInt)info)); 720 721 PetscCall(pounders_update_res(tao)); 722 723 valid = PETSC_TRUE; 724 725 PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 726 PetscCall(VecAssemblyBegin(tao->gradient)); 727 PetscCall(VecAssemblyEnd(tao->gradient)); 728 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 729 gnorm *= mfqP->delta; 730 PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 731 732 tao->reason = TAO_CONTINUE_ITERATING; 733 PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its)); 734 PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step)); 735 PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 736 737 mfqP->nHist = mfqP->n + 1; 738 mfqP->nmodelpoints = mfqP->n + 1; 739 PetscCall(PetscInfo(tao, "Initial gradient: %20.19e\n", (double)gnorm)); 740 741 while (tao->reason == TAO_CONTINUE_ITERATING) { 742 PetscReal gnm = 1e-4; 743 /* Call general purpose update function */ 744 PetscTryTypeMethod(tao, update, tao->niter, tao->user_update); 745 tao->niter++; 746 /* Solve the subproblem min{Q(s): ||s|| <= 1.0} */ 747 PetscCall(gqtwrap(tao, &gnm, &mdec)); 748 /* Evaluate the function at the new point */ 749 750 for (i = 0; i < mfqP->n; i++) mfqP->work[i] = mfqP->Xsubproblem[i] * mfqP->delta + mfqP->xmin[i]; 751 PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[mfqP->nHist])); 752 PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[mfqP->nHist])); 753 PetscCall(VecSetValues(mfqP->Xhist[mfqP->nHist], mfqP->n, mfqP->indices, mfqP->work, INSERT_VALUES)); 754 PetscCall(VecAssemblyBegin(mfqP->Xhist[mfqP->nHist])); 755 PetscCall(VecAssemblyEnd(mfqP->Xhist[mfqP->nHist])); 756 757 PetscCall(pounders_feval(tao, mfqP->Xhist[mfqP->nHist], mfqP->Fhist[mfqP->nHist], &mfqP->Fres[mfqP->nHist])); 758 759 rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec; 760 mfqP->nHist++; 761 762 /* Update the center */ 763 if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid == PETSC_TRUE)) { 764 /* Update model to reflect new base point */ 765 for (i = 0; i < mfqP->n; i++) mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i]) / mfqP->delta; 766 for (j = 0; j < mfqP->m; j++) { 767 /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work'; 768 G(:,j) = G(:,j) + H(:,:,j)*work' */ 769 for (k = 0; k < mfqP->n; k++) { 770 mfqP->work2[k] = 0.0; 771 for (l = 0; l < mfqP->n; l++) mfqP->work2[k] += mfqP->H[j + mfqP->m * (k + l * mfqP->n)] * mfqP->work[l]; 772 } 773 for (i = 0; i < mfqP->n; i++) { 774 mfqP->C[j] += mfqP->work[i] * (mfqP->Fdiff[i + mfqP->n * j] + 0.5 * mfqP->work2[i]); 775 mfqP->Fdiff[i + mfqP->n * j] += mfqP->work2[i]; 776 } 777 } 778 /* Cres += work*Gres + .5*work*Hres*work'; 779 Gres += Hres*work'; */ 780 781 PetscCallBLAS("BLASgemv", BLASgemv_("N", &blasn, &blasn, &one, mfqP->Hres, &blasn, mfqP->work, &ione, &zero, mfqP->work2, &ione)); 782 for (i = 0; i < mfqP->n; i++) mfqP->Gres[i] += mfqP->work2[i]; 783 mfqP->minindex = mfqP->nHist - 1; 784 minnorm = mfqP->Fres[mfqP->minindex]; 785 PetscCall(VecCopy(mfqP->Fhist[mfqP->minindex], tao->ls_res)); 786 /* Change current center */ 787 PetscCall(VecGetArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 788 for (i = 0; i < mfqP->n; i++) mfqP->xmin[i] = xmint[i]; 789 PetscCall(VecRestoreArrayRead(mfqP->Xhist[mfqP->minindex], &xmint)); 790 } 791 792 /* Evaluate at a model-improving point if necessary */ 793 if (valid == PETSC_FALSE) { 794 mfqP->q_is_I = 1; 795 mfqP->nmodelpoints = 0; 796 PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1)); 797 if (mfqP->nmodelpoints < mfqP->n) { 798 PetscCall(PetscInfo(tao, "Model not valid -- model-improving\n")); 799 PetscCall(modelimprove(tao, mfqP, 1)); 800 } 801 } 802 803 /* Update the trust region radius */ 804 deltaold = mfqP->delta; 805 normxsp = 0; 806 for (i = 0; i < mfqP->n; i++) normxsp += mfqP->Xsubproblem[i] * mfqP->Xsubproblem[i]; 807 normxsp = PetscSqrtReal(normxsp); 808 if (rho >= mfqP->eta1 && normxsp > 0.5 * mfqP->delta) { 809 mfqP->delta = PetscMin(mfqP->delta * mfqP->gamma1, mfqP->deltamax); 810 } else if (valid == PETSC_TRUE) { 811 mfqP->delta = PetscMax(mfqP->delta * mfqP->gamma0, mfqP->deltamin); 812 } 813 814 /* Compute the next interpolation set */ 815 mfqP->q_is_I = 1; 816 mfqP->nmodelpoints = 0; 817 PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c1 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c1)); 818 PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c1)); 819 if (mfqP->nmodelpoints == mfqP->n) { 820 valid = PETSC_TRUE; 821 } else { 822 valid = PETSC_FALSE; 823 PetscCall(PetscInfo(tao, "Affine Points: xmin = %20.19e, c2 = %20.19e\n", (double)*mfqP->xmin, (double)mfqP->c2)); 824 PetscCall(affpoints(mfqP, mfqP->xmin, mfqP->c2)); 825 if (mfqP->n > mfqP->nmodelpoints) { 826 PetscCall(PetscInfo(tao, "Model not valid -- adding geometry points\n")); 827 PetscCall(modelimprove(tao, mfqP, mfqP->n - mfqP->nmodelpoints)); 828 } 829 } 830 for (i = mfqP->nmodelpoints; i > 0; i--) mfqP->model_indices[i] = mfqP->model_indices[i - 1]; 831 mfqP->nmodelpoints++; 832 mfqP->model_indices[0] = mfqP->minindex; 833 PetscCall(morepoints(mfqP)); 834 for (i = 0; i < mfqP->nmodelpoints; i++) { 835 PetscCall(VecGetArray(mfqP->Xhist[mfqP->model_indices[i]], &x)); 836 for (j = 0; j < mfqP->n; j++) mfqP->Disp[i + mfqP->npmax * j] = (x[j] - mfqP->xmin[j]) / deltaold; 837 PetscCall(VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]], &x)); 838 PetscCall(VecGetArray(mfqP->Fhist[mfqP->model_indices[i]], &f)); 839 for (j = 0; j < mfqP->m; j++) { 840 for (k = 0; k < mfqP->n; k++) { 841 mfqP->work[k] = 0.0; 842 for (l = 0; l < mfqP->n; l++) mfqP->work[k] += mfqP->H[j + mfqP->m * (k + mfqP->n * l)] * mfqP->Disp[i + mfqP->npmax * l]; 843 } 844 PetscCallBLAS("BLASdot", mfqP->RES[j * mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn, &mfqP->Fdiff[j * mfqP->n], &ione, &mfqP->Disp[i], &blasnpmax) - 0.5 * BLASdot_(&blasn, mfqP->work, &ione, &mfqP->Disp[i], &blasnpmax) + f[j]); 845 } 846 PetscCall(VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]], &f)); 847 } 848 849 /* Update the quadratic model */ 850 PetscCall(PetscInfo(tao, "Get Quad, size: %" PetscInt_FMT ", points: %" PetscInt_FMT "\n", mfqP->n, mfqP->nmodelpoints)); 851 PetscCall(getquadpounders(mfqP)); 852 PetscCall(VecGetArrayRead(mfqP->Fhist[mfqP->minindex], &fmin)); 853 PetscCallBLAS("BLAScopy", BLAScopy_(&blasm, fmin, &ione, mfqP->C, &ione)); 854 /* G = G*(delta/deltaold) + Gdel */ 855 ratio = mfqP->delta / deltaold; 856 iblas = blasm * blasn; 857 PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->Fdiff, &ione)); 858 PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Gdel, &ione, mfqP->Fdiff, &ione)); 859 /* H = H*(delta/deltaold)^2 + Hdel */ 860 iblas = blasm * blasn * blasn; 861 ratio *= ratio; 862 PetscCallBLAS("BLASscal", BLASscal_(&iblas, &ratio, mfqP->H, &ione)); 863 PetscCallBLAS("BLASaxpy", BLASaxpy_(&iblas, &one, mfqP->Hdel, &ione, mfqP->H, &ione)); 864 865 /* Get residuals */ 866 PetscCall(pounders_update_res(tao)); 867 868 /* Export solution and gradient residual to TAO */ 869 PetscCall(VecCopy(mfqP->Xhist[mfqP->minindex], tao->solution)); 870 PetscCall(VecSetValues(tao->gradient, mfqP->n, mfqP->indices, mfqP->Gres, INSERT_VALUES)); 871 PetscCall(VecAssemblyBegin(tao->gradient)); 872 PetscCall(VecAssemblyEnd(tao->gradient)); 873 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 874 gnorm *= mfqP->delta; 875 /* final criticality test */ 876 PetscCall(TaoLogConvergenceHistory(tao, minnorm, gnorm, 0.0, tao->ksp_its)); 877 PetscCall(TaoMonitor(tao, tao->niter, minnorm, gnorm, 0.0, step)); 878 PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 879 /* test for repeated model */ 880 if (mfqP->nmodelpoints == mfqP->last_nmodelpoints) { 881 same = PETSC_TRUE; 882 } else { 883 same = PETSC_FALSE; 884 } 885 for (i = 0; i < mfqP->nmodelpoints; i++) { 886 if (same) { 887 if (mfqP->model_indices[i] == mfqP->last_model_indices[i]) { 888 same = PETSC_TRUE; 889 } else { 890 same = PETSC_FALSE; 891 } 892 } 893 mfqP->last_model_indices[i] = mfqP->model_indices[i]; 894 } 895 mfqP->last_nmodelpoints = mfqP->nmodelpoints; 896 if (same && mfqP->delta == deltaold) { 897 PetscCall(PetscInfo(tao, "Identical model used in successive iterations\n")); 898 tao->reason = TAO_CONVERGED_STEPTOL; 899 } 900 } 901 PetscFunctionReturn(PETSC_SUCCESS); 902 } 903 904 static PetscErrorCode TaoSetUp_POUNDERS(Tao tao) 905 { 906 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 907 PetscInt i, j; 908 IS isfloc, isfglob, isxloc, isxglob; 909 910 PetscFunctionBegin; 911 if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient)); 912 if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection)); 913 PetscCall(VecGetSize(tao->solution, &mfqP->n)); 914 PetscCall(VecGetSize(tao->ls_res, &mfqP->m)); 915 mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n); 916 if (mfqP->npmax == PETSC_DEFAULT) mfqP->npmax = 2 * mfqP->n + 1; 917 mfqP->npmax = PetscMin((mfqP->n + 1) * (mfqP->n + 2) / 2, mfqP->npmax); 918 mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n + 2); 919 920 PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Xhist)); 921 PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fhist)); 922 for (i = 0; i < mfqP->n + 1; i++) { 923 PetscCall(VecDuplicate(tao->solution, &mfqP->Xhist[i])); 924 PetscCall(VecDuplicate(tao->ls_res, &mfqP->Fhist[i])); 925 } 926 PetscCall(VecDuplicate(tao->solution, &mfqP->workxvec)); 927 PetscCall(VecDuplicate(tao->ls_res, &mfqP->workfvec)); 928 mfqP->nHist = 0; 929 930 PetscCall(PetscMalloc1(tao->max_funcs + 100, &mfqP->Fres)); 931 PetscCall(PetscMalloc1(mfqP->npmax * mfqP->m, &mfqP->RES)); 932 PetscCall(PetscMalloc1(mfqP->n, &mfqP->work)); 933 PetscCall(PetscMalloc1(mfqP->n, &mfqP->work2)); 934 PetscCall(PetscMalloc1(mfqP->n, &mfqP->work3)); 935 PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n + 1), &mfqP->mwork)); 936 PetscCall(PetscMalloc1(mfqP->npmax - mfqP->n - 1, &mfqP->omega)); 937 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2, &mfqP->beta)); 938 PetscCall(PetscMalloc1(mfqP->n + 1, &mfqP->alpha)); 939 940 PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->H)); 941 PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q)); 942 PetscCall(PetscMalloc1(mfqP->npmax * mfqP->npmax, &mfqP->Q_tmp)); 943 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L)); 944 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_tmp)); 945 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->L_save)); 946 PetscCall(PetscMalloc1(mfqP->n * (mfqP->n + 1) / 2 * (mfqP->npmax), &mfqP->N)); 947 PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->n + 1), &mfqP->M)); 948 PetscCall(PetscMalloc1(mfqP->npmax * (mfqP->npmax - mfqP->n - 1), &mfqP->Z)); 949 PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau)); 950 PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->tau_tmp)); 951 mfqP->nmax = PetscMax(5 * mfqP->npmax, mfqP->n * (mfqP->n + 1) / 2); 952 PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxwork)); 953 PetscCall(PetscMalloc1(mfqP->nmax, &mfqP->npmaxiwork)); 954 PetscCall(PetscMalloc1(mfqP->n, &mfqP->xmin)); 955 PetscCall(PetscMalloc1(mfqP->m, &mfqP->C)); 956 PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Fdiff)); 957 PetscCall(PetscMalloc1(mfqP->npmax * mfqP->n, &mfqP->Disp)); 958 PetscCall(PetscMalloc1(mfqP->n, &mfqP->Gres)); 959 PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Hres)); 960 PetscCall(PetscMalloc1(mfqP->n * mfqP->n, &mfqP->Gpoints)); 961 PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->model_indices)); 962 PetscCall(PetscMalloc1(mfqP->npmax, &mfqP->last_model_indices)); 963 PetscCall(PetscMalloc1(mfqP->n, &mfqP->Xsubproblem)); 964 PetscCall(PetscMalloc1(mfqP->m * mfqP->n, &mfqP->Gdel)); 965 PetscCall(PetscMalloc1(mfqP->n * mfqP->n * mfqP->m, &mfqP->Hdel)); 966 PetscCall(PetscMalloc1(PetscMax(mfqP->m, mfqP->n), &mfqP->indices)); 967 PetscCall(PetscMalloc1(mfqP->n, &mfqP->iwork)); 968 PetscCall(PetscMalloc1(mfqP->m * mfqP->m, &mfqP->w)); 969 for (i = 0; i < mfqP->m; i++) { 970 for (j = 0; j < mfqP->m; j++) { 971 if (i == j) { 972 mfqP->w[i + mfqP->m * j] = 1.0; 973 } else { 974 mfqP->w[i + mfqP->m * j] = 0.0; 975 } 976 } 977 } 978 for (i = 0; i < PetscMax(mfqP->m, mfqP->n); i++) mfqP->indices[i] = i; 979 PetscCallMPI(MPI_Comm_size(((PetscObject)tao)->comm, &mfqP->size)); 980 if (mfqP->size > 1) { 981 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localx)); 982 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->localxmin)); 983 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localf)); 984 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->m, &mfqP->localfmin)); 985 PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxloc)); 986 PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->n, 0, 1, &isxglob)); 987 PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfloc)); 988 PetscCall(ISCreateStride(MPI_COMM_SELF, mfqP->m, 0, 1, &isfglob)); 989 990 PetscCall(VecScatterCreate(tao->solution, isxglob, mfqP->localx, isxloc, &mfqP->scatterx)); 991 PetscCall(VecScatterCreate(tao->ls_res, isfglob, mfqP->localf, isfloc, &mfqP->scatterf)); 992 993 PetscCall(ISDestroy(&isxloc)); 994 PetscCall(ISDestroy(&isxglob)); 995 PetscCall(ISDestroy(&isfloc)); 996 PetscCall(ISDestroy(&isfglob)); 997 } 998 999 if (!mfqP->usegqt) { 1000 KSP ksp; 1001 PC pc; 1002 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Xsubproblem, &mfqP->subx)); 1003 PetscCall(VecCreateSeq(PETSC_COMM_SELF, mfqP->n, &mfqP->subxl)); 1004 PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subb)); 1005 PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subxu)); 1006 PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subpdel)); 1007 PetscCall(VecDuplicate(mfqP->subxl, &mfqP->subndel)); 1008 PetscCall(TaoCreate(PETSC_COMM_SELF, &mfqP->subtao)); 1009 PetscCall(PetscObjectIncrementTabLevel((PetscObject)mfqP->subtao, (PetscObject)tao, 1)); 1010 PetscCall(TaoSetType(mfqP->subtao, TAOBNTR)); 1011 PetscCall(TaoSetOptionsPrefix(mfqP->subtao, "pounders_subsolver_")); 1012 PetscCall(TaoSetSolution(mfqP->subtao, mfqP->subx)); 1013 PetscCall(TaoSetObjectiveAndGradient(mfqP->subtao, NULL, pounders_fg, (void *)mfqP)); 1014 PetscCall(TaoSetMaximumIterations(mfqP->subtao, mfqP->gqt_maxits)); 1015 PetscCall(TaoSetFromOptions(mfqP->subtao)); 1016 PetscCall(TaoGetKSP(mfqP->subtao, &ksp)); 1017 if (ksp) { 1018 PetscCall(KSPGetPC(ksp, &pc)); 1019 PetscCall(PCSetType(pc, PCNONE)); 1020 } 1021 PetscCall(TaoSetVariableBounds(mfqP->subtao, mfqP->subxl, mfqP->subxu)); 1022 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mfqP->n, mfqP->n, mfqP->Hres, &mfqP->subH)); 1023 PetscCall(TaoSetHessian(mfqP->subtao, mfqP->subH, mfqP->subH, pounders_h, (void *)mfqP)); 1024 } 1025 PetscFunctionReturn(PETSC_SUCCESS); 1026 } 1027 1028 static PetscErrorCode TaoDestroy_POUNDERS(Tao tao) 1029 { 1030 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1031 PetscInt i; 1032 1033 PetscFunctionBegin; 1034 if (!mfqP->usegqt) { 1035 PetscCall(TaoDestroy(&mfqP->subtao)); 1036 PetscCall(VecDestroy(&mfqP->subx)); 1037 PetscCall(VecDestroy(&mfqP->subxl)); 1038 PetscCall(VecDestroy(&mfqP->subxu)); 1039 PetscCall(VecDestroy(&mfqP->subb)); 1040 PetscCall(VecDestroy(&mfqP->subpdel)); 1041 PetscCall(VecDestroy(&mfqP->subndel)); 1042 PetscCall(MatDestroy(&mfqP->subH)); 1043 } 1044 PetscCall(PetscFree(mfqP->Fres)); 1045 PetscCall(PetscFree(mfqP->RES)); 1046 PetscCall(PetscFree(mfqP->work)); 1047 PetscCall(PetscFree(mfqP->work2)); 1048 PetscCall(PetscFree(mfqP->work3)); 1049 PetscCall(PetscFree(mfqP->mwork)); 1050 PetscCall(PetscFree(mfqP->omega)); 1051 PetscCall(PetscFree(mfqP->beta)); 1052 PetscCall(PetscFree(mfqP->alpha)); 1053 PetscCall(PetscFree(mfqP->H)); 1054 PetscCall(PetscFree(mfqP->Q)); 1055 PetscCall(PetscFree(mfqP->Q_tmp)); 1056 PetscCall(PetscFree(mfqP->L)); 1057 PetscCall(PetscFree(mfqP->L_tmp)); 1058 PetscCall(PetscFree(mfqP->L_save)); 1059 PetscCall(PetscFree(mfqP->N)); 1060 PetscCall(PetscFree(mfqP->M)); 1061 PetscCall(PetscFree(mfqP->Z)); 1062 PetscCall(PetscFree(mfqP->tau)); 1063 PetscCall(PetscFree(mfqP->tau_tmp)); 1064 PetscCall(PetscFree(mfqP->npmaxwork)); 1065 PetscCall(PetscFree(mfqP->npmaxiwork)); 1066 PetscCall(PetscFree(mfqP->xmin)); 1067 PetscCall(PetscFree(mfqP->C)); 1068 PetscCall(PetscFree(mfqP->Fdiff)); 1069 PetscCall(PetscFree(mfqP->Disp)); 1070 PetscCall(PetscFree(mfqP->Gres)); 1071 PetscCall(PetscFree(mfqP->Hres)); 1072 PetscCall(PetscFree(mfqP->Gpoints)); 1073 PetscCall(PetscFree(mfqP->model_indices)); 1074 PetscCall(PetscFree(mfqP->last_model_indices)); 1075 PetscCall(PetscFree(mfqP->Xsubproblem)); 1076 PetscCall(PetscFree(mfqP->Gdel)); 1077 PetscCall(PetscFree(mfqP->Hdel)); 1078 PetscCall(PetscFree(mfqP->indices)); 1079 PetscCall(PetscFree(mfqP->iwork)); 1080 PetscCall(PetscFree(mfqP->w)); 1081 for (i = 0; i < mfqP->nHist; i++) { 1082 PetscCall(VecDestroy(&mfqP->Xhist[i])); 1083 PetscCall(VecDestroy(&mfqP->Fhist[i])); 1084 } 1085 PetscCall(VecDestroy(&mfqP->workxvec)); 1086 PetscCall(VecDestroy(&mfqP->workfvec)); 1087 PetscCall(PetscFree(mfqP->Xhist)); 1088 PetscCall(PetscFree(mfqP->Fhist)); 1089 1090 if (mfqP->size > 1) { 1091 PetscCall(VecDestroy(&mfqP->localx)); 1092 PetscCall(VecDestroy(&mfqP->localxmin)); 1093 PetscCall(VecDestroy(&mfqP->localf)); 1094 PetscCall(VecDestroy(&mfqP->localfmin)); 1095 } 1096 PetscCall(PetscFree(tao->data)); 1097 PetscFunctionReturn(PETSC_SUCCESS); 1098 } 1099 1100 static PetscErrorCode TaoSetFromOptions_POUNDERS(Tao tao, PetscOptionItems *PetscOptionsObject) 1101 { 1102 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1103 1104 PetscFunctionBegin; 1105 PetscOptionsHeadBegin(PetscOptionsObject, "POUNDERS method for least-squares optimization"); 1106 PetscCall(PetscOptionsReal("-tao_pounders_delta", "initial delta", "", mfqP->delta, &mfqP->delta0, NULL)); 1107 mfqP->delta = mfqP->delta0; 1108 PetscCall(PetscOptionsInt("-tao_pounders_npmax", "max number of points in model", "", mfqP->npmax, &mfqP->npmax, NULL)); 1109 PetscCall(PetscOptionsBool("-tao_pounders_gqt", "use gqt algorithm for subproblem", "", mfqP->usegqt, &mfqP->usegqt, NULL)); 1110 PetscOptionsHeadEnd(); 1111 PetscFunctionReturn(PETSC_SUCCESS); 1112 } 1113 1114 static PetscErrorCode TaoView_POUNDERS(Tao tao, PetscViewer viewer) 1115 { 1116 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1117 PetscBool isascii; 1118 1119 PetscFunctionBegin; 1120 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1121 if (isascii) { 1122 PetscCall(PetscViewerASCIIPrintf(viewer, "initial delta: %g\n", (double)mfqP->delta0)); 1123 PetscCall(PetscViewerASCIIPrintf(viewer, "final delta: %g\n", (double)mfqP->delta)); 1124 PetscCall(PetscViewerASCIIPrintf(viewer, "model points: %" PetscInt_FMT "\n", mfqP->nmodelpoints)); 1125 if (mfqP->usegqt) { 1126 PetscCall(PetscViewerASCIIPrintf(viewer, "subproblem solver: gqt\n")); 1127 } else { 1128 PetscCall(TaoView(mfqP->subtao, viewer)); 1129 } 1130 } 1131 PetscFunctionReturn(PETSC_SUCCESS); 1132 } 1133 /*MC 1134 TAOPOUNDERS - POUNDERS derivate-free model-based algorithm for nonlinear least squares 1135 1136 Options Database Keys: 1137 + -tao_pounders_delta - initial step length 1138 . -tao_pounders_npmax - maximum number of points in model 1139 - -tao_pounders_gqt - use gqt algorithm for subproblem instead of TRON 1140 1141 Level: beginner 1142 1143 M*/ 1144 1145 PETSC_EXTERN PetscErrorCode TaoCreate_POUNDERS(Tao tao) 1146 { 1147 TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data; 1148 1149 PetscFunctionBegin; 1150 tao->ops->setup = TaoSetUp_POUNDERS; 1151 tao->ops->solve = TaoSolve_POUNDERS; 1152 tao->ops->view = TaoView_POUNDERS; 1153 tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS; 1154 tao->ops->destroy = TaoDestroy_POUNDERS; 1155 1156 PetscCall(PetscNew(&mfqP)); 1157 tao->data = (void *)mfqP; 1158 /* Override default settings (unless already changed) */ 1159 if (!tao->max_it_changed) tao->max_it = 2000; 1160 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 1161 mfqP->npmax = PETSC_DEFAULT; 1162 mfqP->delta0 = 0.1; 1163 mfqP->delta = 0.1; 1164 mfqP->deltamax = 1e3; 1165 mfqP->deltamin = 1e-6; 1166 mfqP->c2 = 10.0; 1167 mfqP->theta1 = 1e-5; 1168 mfqP->theta2 = 1e-4; 1169 mfqP->gamma0 = 0.5; 1170 mfqP->gamma1 = 2.0; 1171 mfqP->eta0 = 0.0; 1172 mfqP->eta1 = 0.1; 1173 mfqP->usegqt = PETSC_FALSE; 1174 mfqP->gqt_rtol = 0.001; 1175 mfqP->gqt_maxits = 50; 1176 mfqP->workxvec = NULL; 1177 PetscFunctionReturn(PETSC_SUCCESS); 1178 } 1179