xref: /petsc/src/tao/leastsquares/impls/pounders/pounders.c (revision 036f3cb51ce7a49e97d9eeeeec1abdf1ae04bc37)
1 #include "pounders.h"
2 
3 #undef __FUNCT__
4 #define __FUNCT__ "pounders_h"
5 static PetscErrorCode pounders_h(TaoSolver subtao, Vec v, Mat *H, Mat *Hpre, MatStructure *flag, void *ctx)
6 {
7   PetscFunctionBegin;
8   *flag = SAME_NONZERO_PATTERN;
9   PetscFunctionReturn(0);
10 }
11 #undef __FUNCT__
12 #define __FUNCT__ "pounders_fg"
13 static PetscErrorCode  pounders_fg(TaoSolver subtao, Vec x, PetscReal *f, Vec g, void *ctx)
14 {
15   TAO_POUNDERS *mfqP = (TAO_POUNDERS*)ctx;
16   PetscReal d1,d2;
17   PetscErrorCode ierr;
18   PetscFunctionBegin;
19   /* g = A*x  (add b later)*/
20   ierr = MatMult(mfqP->subH,x,g); CHKERRQ(ierr);
21 
22 
23   /* f = 1/2 * x'*(Ax) + b'*x  */
24   ierr = VecDot(x,g,&d1); CHKERRQ(ierr);
25   ierr = VecDot(mfqP->subb,x,&d2); CHKERRQ(ierr);
26   *f = 0.5 *d1 + d2;
27 
28   /* now  g = g + b */
29   ierr = VecAXPY(g, 1.0, mfqP->subb); CHKERRQ(ierr);
30 
31   PetscFunctionReturn(0);
32 }
33 #undef __FUNCT__
34 #define __FUNCT__ "gqtwrap"
35 PetscErrorCode gqtwrap(TaoSolver tao,PetscReal *gnorm, PetscReal *qmin)
36 {
37     PetscErrorCode ierr;
38     PetscReal atol=1.0e-10;
39     PetscInt info,its;
40     TAO_POUNDERS *mfqP = (TAO_POUNDERS*)tao->data;
41 
42     PetscFunctionBegin;
43 
44     if (! mfqP->usegqt) {
45       PetscReal maxval;
46       PetscInt i,j;
47 
48       ierr = VecSetValues(mfqP->subb,mfqP->n,mfqP->indices,mfqP->Gres,INSERT_VALUES); CHKERRQ(ierr);
49       ierr = VecAssemblyBegin(mfqP->subb); CHKERRQ(ierr);
50       ierr = VecAssemblyEnd(mfqP->subb); CHKERRQ(ierr);
51 
52       ierr = VecSet(mfqP->subx,0.0); CHKERRQ(ierr);
53 
54       ierr = VecSet(mfqP->subndel,-mfqP->delta); CHKERRQ(ierr);
55       ierr = VecSet(mfqP->subpdel,mfqP->delta); CHKERRQ(ierr);
56 
57       for (i=0;i<mfqP->n;i++) {
58 	for (j=i;j<mfqP->n;j++) {
59 	    mfqP->Hres[j+mfqP->n*i] = mfqP->Hres[mfqP->n*j+i];
60 	}
61       }
62       ierr = MatSetValues(mfqP->subH,mfqP->n,mfqP->indices,mfqP->n,mfqP->indices,mfqP->Hres,INSERT_VALUES);
63       ierr = MatAssemblyBegin(mfqP->subH,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
64       ierr = MatAssemblyEnd(mfqP->subH,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr);
65 
66       ierr = TaoResetStatistics(mfqP->subtao); CHKERRQ(ierr);
67       ierr = TaoSetTolerances(mfqP->subtao,PETSC_DEFAULT,PETSC_DEFAULT,*gnorm,*gnorm,PETSC_DEFAULT); CHKERRQ(ierr);
68       /* enforce bound constraints -- experimental */
69       if (tao->XU && tao->XL) {
70 	ierr = VecCopy(tao->XU,mfqP->subxu); CHKERRQ(ierr);
71 	ierr = VecAXPY(mfqP->subxu,-1.0,tao->solution); CHKERRQ(ierr);
72 	ierr = VecScale(mfqP->subxu,1.0/mfqP->delta); CHKERRQ(ierr);
73 	ierr = VecCopy(tao->XL,mfqP->subxl); CHKERRQ(ierr);
74 	ierr = VecAXPY(mfqP->subxl,-1.0,tao->solution); CHKERRQ(ierr);
75 	ierr = VecScale(mfqP->subxl,1.0/mfqP->delta); CHKERRQ(ierr);
76 
77 	ierr = VecPointwiseMin(mfqP->subxu,mfqP->subxu,mfqP->subpdel); CHKERRQ(ierr);
78 	ierr = VecPointwiseMax(mfqP->subxl,mfqP->subxl,mfqP->subndel); CHKERRQ(ierr);
79       } else {
80 	ierr = VecCopy(mfqP->subpdel,mfqP->subxu); CHKERRQ(ierr);
81 	ierr = VecCopy(mfqP->subndel,mfqP->subxl); CHKERRQ(ierr);
82       }
83       /* Make sure xu > xl */
84       ierr = VecCopy(mfqP->subxl,mfqP->subpdel); CHKERRQ(ierr);
85       ierr = VecAXPY(mfqP->subpdel,-1.0,mfqP->subxu);  CHKERRQ(ierr);
86       ierr = VecMax(mfqP->subpdel,PETSC_NULL,&maxval); CHKERRQ(ierr);
87       if (maxval > 1e-10) {
88 	SETERRQ(PETSC_COMM_WORLD,1,"upper bound < lower bound in subproblem");
89       }
90       /* Make sure xu > tao->solution > xl */
91       ierr = VecCopy(mfqP->subxl,mfqP->subpdel); CHKERRQ(ierr);
92       ierr = VecAXPY(mfqP->subpdel,-1.0,mfqP->subx);  CHKERRQ(ierr);
93       ierr = VecMax(mfqP->subpdel,PETSC_NULL,&maxval); CHKERRQ(ierr);
94       if (maxval > 1e-10) {
95 	SETERRQ(PETSC_COMM_WORLD,1,"initial guess < lower bound in subproblem");
96       }
97 
98       ierr = VecCopy(mfqP->subx,mfqP->subpdel); CHKERRQ(ierr);
99       ierr = VecAXPY(mfqP->subpdel,-1.0,mfqP->subxu);  CHKERRQ(ierr);
100       ierr = VecMax(mfqP->subpdel,PETSC_NULL,&maxval); CHKERRQ(ierr);
101       if (maxval > 1e-10) {
102 	SETERRQ(PETSC_COMM_WORLD,1,"initial guess > upper bound in subproblem");
103       }
104 
105 
106       ierr = TaoSolve(mfqP->subtao); CHKERRQ(ierr);
107       ierr = TaoGetSolutionStatus(mfqP->subtao,PETSC_NULL,qmin,PETSC_NULL,PETSC_NULL,PETSC_NULL,PETSC_NULL); CHKERRQ(ierr);
108 
109       /* test bounds post-solution*/
110       ierr = VecCopy(mfqP->subxl,mfqP->subpdel); CHKERRQ(ierr);
111       ierr = VecAXPY(mfqP->subpdel,-1.0,mfqP->subx);  CHKERRQ(ierr);
112       ierr = VecMax(mfqP->subpdel,PETSC_NULL,&maxval); CHKERRQ(ierr);
113       if (maxval > 1e-5) {
114 	ierr = PetscInfo(tao,"subproblem solution < lower bound"); CHKERRQ(ierr);
115 	tao->reason = TAO_DIVERGED_TR_REDUCTION;
116       }
117 
118       ierr = VecCopy(mfqP->subx,mfqP->subpdel); CHKERRQ(ierr);
119       ierr = VecAXPY(mfqP->subpdel,-1.0,mfqP->subxu);  CHKERRQ(ierr);
120       ierr = VecMax(mfqP->subpdel,PETSC_NULL,&maxval); CHKERRQ(ierr);
121       if (maxval > 1e-5) {
122 	ierr = PetscInfo(tao,"subproblem solution > upper bound");
123 	tao->reason = TAO_DIVERGED_TR_REDUCTION;
124       }
125 
126 
127     } else {
128       gqt(mfqP->n,mfqP->Hres,mfqP->n,mfqP->Gres,1.0,mfqP->gqt_rtol,atol,
129 	mfqP->gqt_maxits,gnorm,qmin,mfqP->Xsubproblem,&info,&its,
130 	mfqP->work,mfqP->work2, mfqP->work3);
131       /*dgqt_(&mfqP->n,mfqP->Hres,&mfqP->n,mfqP->Gres,&one,&mfqP->gqt_rtol,&atol,
132 	&mfqP->gqt_maxits,gnorm,qmin,mfqP->Xsubproblem,&info,&its,
133 	mfqP->work,mfqP->work2, mfqP->work3);*/
134     }
135     *qmin *= -1;
136     PetscFunctionReturn(0);
137 }
138 
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "phi2eval"
142 PetscErrorCode phi2eval(PetscReal *x, PetscInt n, PetscReal *phi) {
143 /* Phi = .5*[x(1)^2  sqrt(2)*x(1)*x(2) ... sqrt(2)*x(1)*x(n) ... x(2)^2 sqrt(2)*x(2)*x(3) .. x(n)^2] */
144     PetscInt i,j,k;
145     PetscReal sqrt2 = PetscSqrtReal(2.0);
146     PetscFunctionBegin;
147     j=0;
148 
149     for (i=0;i<n;i++) {
150 	phi[j] = 0.5 * x[i]*x[i];
151 	j++;
152 	for (k=i+1;k<n;k++) {
153 	    phi[j]  = x[i]*x[k]/sqrt2;
154 	    j++;
155 	}
156 
157     }
158 
159     PetscFunctionReturn(0);
160 }
161 
162 #undef __FUNCT__
163 #define __FUNCT__ "getquadpounders"
164 PetscErrorCode getquadpounders(TAO_POUNDERS *mfqP) {
165 /* Computes the parameters of the quadratic Q(x) = c + g'*x + 0.5*x*G*x'
166    that satisfies the interpolation conditions Q(X[:,j]) = f(j)
167    for j=1,...,m and with a Hessian matrix of least Frobenius norm */
168 
169     /* NB --we are ignoring c */
170     PetscInt i,j,k,num,np = mfqP->nmodelpoints;
171     PetscReal one = 1.0,zero=0.0,negone=-1.0;
172     PetscBLASInt blasnpmax = mfqP->npmax;
173     PetscBLASInt blasnplus1 = mfqP->n+1;
174     PetscBLASInt blasnp = np;
175     PetscBLASInt blasint = mfqP->n*(mfqP->n+1) / 2;
176     PetscBLASInt blasint2 = np - mfqP->n-1;
177     PetscBLASInt blasinfo,ione=1;
178     PetscReal sqrt2 = PetscSqrtReal(2.0);
179 
180     PetscFunctionBegin;
181 
182     for (i=0;i<mfqP->n*mfqP->m;i++) {
183 	mfqP->Gdel[i] = 0;
184     }
185     for (i=0;i<mfqP->n*mfqP->n*mfqP->m;i++) {
186 	mfqP->Hdel[i] = 0;
187     }
188 
189     /* factor M */
190     LAPACKgetrf_(&blasnplus1,&blasnpmax,mfqP->M,&blasnplus1,mfqP->npmaxiwork,&blasinfo);
191     if (blasinfo != 0) {
192 	SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine getrf returned with value %D\n",blasinfo);
193     }
194 
195 
196     if (np == mfqP->n+1) {
197       for (i=0;i<mfqP->npmax-mfqP->n-1;i++) {
198 	mfqP->omega[i]=0.0;
199       }
200       for (i=0;i<mfqP->n*(mfqP->n+1)/2;i++) {
201 	mfqP->beta[i]=0.0;
202       }
203     } else {
204       /* Let Ltmp = (L'*L) */
205       BLASgemm_("T","N",&blasint2,&blasint2,&blasint,&one,&mfqP->L[(mfqP->n+1)*blasint],&blasint,&mfqP->L[(mfqP->n+1)*blasint],&blasint,&zero,mfqP->L_tmp,&blasint);
206 
207       /* factor Ltmp */
208       LAPACKpotrf_("L",&blasint2,mfqP->L_tmp,&blasint,&blasinfo);
209       if (blasinfo != 0) {
210 	SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine potrf returned with value %D\n",blasinfo);
211       }
212     }
213 
214     for (k=0;k<mfqP->m;k++) {
215 
216       if (np != mfqP->n+1) {
217 	/* Solve L'*L*Omega = Z' * RESk*/
218 	BLASgemv_("T",&blasnp,&blasint2,&one,mfqP->Z,&blasnpmax,&mfqP->RES[mfqP->npmax*k],&ione,&zero,mfqP->omega,&ione);
219 
220 	LAPACKpotrs_("L",&blasint2,&ione,mfqP->L_tmp,&blasint,mfqP->omega,&blasint2,&blasinfo);
221 	if (blasinfo != 0) {
222 	  SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine potrs returned with value %D\n",blasinfo);
223 	}
224 
225 	/* Beta = L*Omega */
226 	BLASgemv_("N",&blasint,&blasint2,&one,&mfqP->L[(mfqP->n+1)*blasint],&blasint,mfqP->omega,&ione,&zero,mfqP->beta,&ione);
227       }
228 
229 
230       /* solve M'*Alpha = RESk - N'*Beta */
231       BLASgemv_("T",&blasint,&blasnp,&negone,mfqP->N,&blasint,mfqP->beta,&ione,&one,&mfqP->RES[mfqP->npmax*k],&ione);
232       LAPACKgetrs_("T",&blasnplus1,&ione,mfqP->M,&blasnplus1,mfqP->npmaxiwork,&mfqP->RES[mfqP->npmax*k],&blasnplus1,&blasinfo);
233       if (blasinfo != 0) {
234 	SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine getrs returned with value %D\n",blasinfo);
235       }
236 
237       /* Gdel(:,k) = Alpha(2:n+1) */
238       for (i=0;i<mfqP->n;i++) {
239 	mfqP->Gdel[i + mfqP->n*k] = mfqP->RES[mfqP->npmax*k + i+1];
240       }
241 
242       /* Set Hdels */
243       num=0;
244       for (i=0;i<mfqP->n;i++) {
245 	/* H[i,i,k] = Beta(num) */
246 	mfqP->Hdel[(i*mfqP->n + i)*mfqP->m + k] = mfqP->beta[num];
247 	num++;
248 	for (j=i+1;j<mfqP->n;j++) {
249 	  /* H[i,j,k] = H[j,i,k] = Beta(num)/sqrt(2) */
250 	  mfqP->Hdel[(j*mfqP->n + i)*mfqP->m + k] = mfqP->beta[num]/sqrt2;
251 	  mfqP->Hdel[(i*mfqP->n + j)*mfqP->m + k] = mfqP->beta[num]/sqrt2;
252 	  num++;
253 	}
254       }
255     }
256 
257     PetscFunctionReturn(0);
258 }
259 
260 #undef __FUNCT__
261 #define __FUNCT__ "morepoints"
262 PetscErrorCode morepoints(TAO_POUNDERS *mfqP) {
263     /* Assumes mfqP->model_indices[0]  is minimum index
264        Finishes adding points to mfqP->model_indices (up to npmax)
265        Computes L,Z,M,N
266        np is actual number of points in model (should equal npmax?) */
267     PetscInt point,i,j,offset;
268     PetscInt reject;
269     PetscBLASInt blasn=mfqP->n,blasnpmax=mfqP->npmax,blasnplus1=mfqP->n+1,blasinfo,blasnmax=mfqP->nmax,blasint,blasint2,blasnp,blasmaxmn;
270     PetscReal *x,normd;
271     PetscErrorCode ierr;
272     PetscFunctionBegin;
273 
274     /* Initialize M,N */
275     for (i=0;i<mfqP->n+1;i++) {
276 	ierr = VecGetArray(mfqP->Xhist[mfqP->model_indices[i]],&x); CHKERRQ(ierr);
277 	mfqP->M[(mfqP->n+1)*i] = 1.0;
278 	for (j=0;j<mfqP->n;j++) {
279 	    mfqP->M[j+1+((mfqP->n+1)*i)] = (x[j]  - mfqP->xmin[j]) / mfqP->delta;
280 	}
281 	ierr = VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]],&x); CHKERRQ(ierr);
282 	ierr = phi2eval(&mfqP->M[1+((mfqP->n+1)*i)],mfqP->n,&mfqP->N[mfqP->n*(mfqP->n+1)/2 * i]); CHKERRQ(ierr);
283 
284 
285 
286     }
287 
288     /* Now we add points until we have npmax starting with the most recent ones */
289     point = mfqP->nHist-1;
290     mfqP->nmodelpoints = mfqP->n+1;
291 
292     while (mfqP->nmodelpoints < mfqP->npmax && point>=0) {
293 	/* Reject any points already in the model */
294 	reject = 0;
295 	for (j=0;j<mfqP->n+1;j++) {
296 	    if (point == mfqP->model_indices[j]) {
297 		reject = 1;
298 		break;
299 	    }
300 	}
301 
302 	/* Reject if norm(d) >c2 */
303 	if (!reject) {
304 	    ierr = VecCopy(mfqP->Xhist[point],mfqP->workxvec); CHKERRQ(ierr);
305 	    ierr = VecAXPY(mfqP->workxvec,-1.0,mfqP->Xhist[mfqP->minindex]); CHKERRQ(ierr);
306 	    ierr = VecNorm(mfqP->workxvec,NORM_2,&normd); CHKERRQ(ierr);
307 	    normd /= mfqP->delta;
308 	    if (normd > mfqP->c2) {
309 		reject =1;
310 	    }
311 	}
312 	if (reject)
313 	{
314 	    point--;
315 	    continue;
316 	}
317 
318 
319 	ierr = VecGetArray(mfqP->Xhist[point],&x); CHKERRQ(ierr);
320 	mfqP->M[(mfqP->n+1)*mfqP->nmodelpoints] = 1.0;
321 	for (j=0;j<mfqP->n;j++) {
322 	    mfqP->M[j+1+((mfqP->n+1)*mfqP->nmodelpoints)] = (x[j]  - mfqP->xmin[j]) / mfqP->delta;
323 	}
324 
325 	ierr = VecRestoreArray(mfqP->Xhist[point],&x); CHKERRQ(ierr);
326 	ierr = phi2eval(&mfqP->M[1+(mfqP->n+1)*mfqP->nmodelpoints],mfqP->n,&mfqP->N[mfqP->n*(mfqP->n+1)/2 * (mfqP->nmodelpoints)]); CHKERRQ(ierr);
327 
328 	/* Update QR factorization */
329 
330 	/* Copy M' to Q_tmp */
331 	for (i=0;i<mfqP->n+1;i++) {
332 	    for (j=0;j<mfqP->npmax;j++) {
333 		mfqP->Q_tmp[j+mfqP->npmax*i] = mfqP->M[i+(mfqP->n+1)*j];
334 	    }
335 	}
336 	blasnp = mfqP->nmodelpoints+1;
337 	/* Q_tmp,R = qr(M') */
338 	blasmaxmn=PetscMax(mfqP->m,mfqP->n+1);
339 	LAPACKgeqrf_(&blasnp,&blasnplus1,mfqP->Q_tmp,&blasnpmax,mfqP->tau_tmp,mfqP->mwork,&blasmaxmn,&blasinfo);
340 
341 	if (blasinfo != 0) {
342 	    SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine geqrf returned with value %D\n",blasinfo);
343 	}
344 
345 	/* Reject if min(svd(N*Q(:,n+2:np+1)) <= theta2 */
346 	/* L = N*Qtmp */
347 	blasint2 = mfqP->n * (mfqP->n+1) / 2;
348 	/* Copy N to L_tmp */
349 	for (i=0;i<mfqP->n*(mfqP->n+1)/2 * mfqP->npmax;i++) {
350 	    mfqP->L_tmp[i]= mfqP->N[i];
351 	}
352 
353 	/* Copy L_save to L_tmp */
354 
355 	/* L_tmp = N*Qtmp' */
356 	LAPACKormqr_("R","N",&blasint2,&blasnp,&blasnplus1,mfqP->Q_tmp,&blasnpmax,mfqP->tau_tmp,
357 		     mfqP->L_tmp,&blasint2,mfqP->npmaxwork,&blasnmax,&blasinfo);
358 
359 	if (blasinfo != 0) {
360 	    SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine ormqr returned with value %D\n",blasinfo);
361 	}
362 
363 	/* Copy L_tmp to L_save */
364 	for (i=0;i<mfqP->npmax * mfqP->n*(mfqP->n+1)/2;i++) {
365 	    mfqP->L_save[i] = mfqP->L_tmp[i];
366 	}
367 
368 	/* Get svd for L_tmp(:,n+2:np+1) (L_tmp is modified in process) */
369 	blasint = mfqP->nmodelpoints - mfqP->n;
370 	LAPACKgesvd_("N","N",&blasint2,&blasint,&mfqP->L_tmp[(mfqP->n+1)*blasint2],&blasint2,
371 		     mfqP->beta,mfqP->work,&blasn,mfqP->work,&blasn,mfqP->npmaxwork,&blasnmax,
372 		     &blasinfo);
373 	if (blasinfo != 0) {
374 	    SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine gesvd returned with value %D\n",blasinfo);
375 	}
376 
377 	if (mfqP->beta[PetscMin(blasint,blasint2)-1] > mfqP->theta2) {
378 	    /* accept point */
379 	    mfqP->model_indices[mfqP->nmodelpoints] = point;
380 	    /* Copy Q_tmp to Q */
381 	    for (i=0;i<mfqP->npmax* mfqP->npmax;i++) {
382 		mfqP->Q[i] = mfqP->Q_tmp[i];
383 	    }
384 	    for (i=0;i<mfqP->npmax;i++){
385 		mfqP->tau[i] = mfqP->tau_tmp[i];
386 	    }
387 	    mfqP->nmodelpoints++;
388 	    blasnp = mfqP->nmodelpoints+1;
389 
390 	    /* Copy L_save to L */
391 	    for (i=0;i<mfqP->npmax * mfqP->n*(mfqP->n+1)/2;i++) {
392 		mfqP->L[i] = mfqP->L_save[i];
393 	    }
394 	}
395 	point--;
396 
397     }
398 
399 
400     blasnp = mfqP->nmodelpoints;
401     /* Copy Q(:,n+2:np) to Z */
402     /* First set Q_tmp to I */
403     for (i=0;i<mfqP->npmax*mfqP->npmax;i++) {
404 	mfqP->Q_tmp[i] = 0.0;
405     }
406     for (i=0;i<mfqP->npmax;i++) {
407 	mfqP->Q_tmp[i + mfqP->npmax*i] = 1.0;
408     }
409 
410     /* Q_tmp = I * Q */
411     LAPACKormqr_("R","N",&blasnp,&blasnp,&blasnplus1,mfqP->Q,&blasnpmax,mfqP->tau,mfqP->Q_tmp,&blasnpmax,mfqP->npmaxwork,&blasnmax,&blasinfo);
412 
413     if (blasinfo != 0) {
414 	SETERRQ1(PETSC_COMM_SELF,1,"LAPACK routine ormqr returned with value %D\n",blasinfo);
415     }
416 
417     /* Copy Q_tmp(:,n+2:np) to Z) */
418     offset = mfqP->npmax * (mfqP->n+1);
419     for (i=offset;i<mfqP->npmax*mfqP->npmax;i++) {
420 	mfqP->Z[i-offset] = mfqP->Q_tmp[i];
421     }
422 
423 
424     if (mfqP->nmodelpoints == mfqP->n + 1) {
425     /* Set L to I_{n+1} */
426       for (i=0;i<mfqP->npmax * mfqP->n*(mfqP->n+1)/2;i++) {
427 	mfqP->L[i] = 0.0;
428       }
429       for (i=0;i<mfqP->n;i++) {
430 	mfqP->L[(mfqP->n*(mfqP->n+1)/2)*i + i] = 1.0;
431       }
432 
433     }
434 
435     PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNCT__
439 #define __FUNCT__ "addpoint"
440 /* Only call from modelimprove, addpoint() needs ->Q_tmp and ->work to be set */
441 PetscErrorCode addpoint(TaoSolver tao, TAO_POUNDERS *mfqP, PetscInt index)
442 {
443   PetscErrorCode ierr;
444   PetscFunctionBegin;
445   /* Create new vector in history: X[newidx] = X[mfqP->index] + delta*X[index]*/
446 
447   ierr = VecDuplicate(mfqP->Xhist[0],&mfqP->Xhist[mfqP->nHist]); CHKERRQ(ierr);
448   ierr = VecSetValues(mfqP->Xhist[mfqP->nHist],mfqP->n,mfqP->indices,&mfqP->Q_tmp[index*mfqP->npmax],INSERT_VALUES); CHKERRQ(ierr);
449   ierr = VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]); CHKERRQ(ierr);
450   ierr = VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]); CHKERRQ(ierr);
451   ierr = VecAYPX(mfqP->Xhist[mfqP->nHist],mfqP->delta,mfqP->Xhist[mfqP->minindex]); CHKERRQ(ierr);
452 
453   /* Project into feasible region */
454   if (tao->XU && tao->XL) {
455     ierr = VecMedian(mfqP->Xhist[mfqP->nHist], tao->XL, tao->XU,
456 		     mfqP->Xhist[mfqP->nHist]); CHKERRQ(ierr);
457   }
458 
459   /* Compute value of new vector */
460   ierr = VecDuplicate(mfqP->Fhist[0],&mfqP->Fhist[mfqP->nHist]); CHKERRQ(ierr);
461   CHKMEMQ;
462   ierr = TaoComputeSeparableObjective(tao,mfqP->Xhist[mfqP->nHist],mfqP->Fhist[mfqP->nHist]); CHKERRQ(ierr);
463   ierr = VecNorm(mfqP->Fhist[mfqP->nHist],NORM_2,&mfqP->Fres[mfqP->nHist]); CHKERRQ(ierr);
464   if (PetscIsInfOrNanReal(mfqP->Fres[mfqP->nHist])) {
465     SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
466   }
467   mfqP->Fres[mfqP->nHist]*=mfqP->Fres[mfqP->nHist];
468 
469   /* Add new vector to model */
470   mfqP->model_indices[mfqP->nmodelpoints] = mfqP->nHist;
471   mfqP->nmodelpoints++;
472   mfqP->nHist++;
473 
474 
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "modelimprove"
480 PetscErrorCode modelimprove(TaoSolver tao, TAO_POUNDERS *mfqP, PetscInt addallpoints) {
481   /* modeld = Q(:,np+1:n)' */
482   PetscErrorCode ierr;
483   PetscInt i,j,minindex=0;
484   PetscReal dp,half=0.5,one=1.0,minvalue=TAO_INFINITY;
485   PetscBLASInt blasn=mfqP->n,  blasnpmax = mfqP->npmax, blask,info;
486   PetscBLASInt blas1=1,blasnmax = mfqP->nmax;
487   blask = mfqP->nmodelpoints;
488 
489 
490   /* Qtmp = I(n x n) */
491   for (i=0;i<mfqP->n;i++) {
492     for (j=0;j<mfqP->n;j++) {
493       mfqP->Q_tmp[i + mfqP->npmax*j] = 0.0;
494     }
495   }
496   for (j=0;j<mfqP->n;j++) {
497     mfqP->Q_tmp[j + mfqP->npmax*j] = 1.0;
498   }
499 
500 
501   /* Qtmp = Q * I */
502   LAPACKormqr_("R","N",&blasn,&blasn,&blask,mfqP->Q,&blasnpmax,
503 	       mfqP->tau, mfqP->Q_tmp, &blasnpmax, mfqP->npmaxwork,&blasnmax, &info);
504 
505   for (i=mfqP->nmodelpoints;i<mfqP->n;i++) {
506     dp = BLASdot_(&blasn,&mfqP->Q_tmp[i*mfqP->npmax],&blas1,mfqP->Gres,&blas1);
507     if (dp>0.0) { /* Model says use the other direction! */
508       for (j=0;j<mfqP->n;j++) {
509 	mfqP->Q_tmp[i*mfqP->npmax+j] *= -1;
510       }
511     }
512     /* mfqP->work[i] = Cres+Modeld(i,:)*(Gres+.5*Hres*Modeld(i,:)') */
513     for (j=0;j<mfqP->n;j++) {
514       mfqP->work2[j] = mfqP->Gres[j];
515     }
516     BLASgemv_("N",&blasn,&blasn,&half,mfqP->Hres,&blasn,
517 	      &mfqP->Q_tmp[i*mfqP->npmax], &blas1, &one, mfqP->work2,&blas1);
518     mfqP->work[i] = BLASdot_(&blasn,&mfqP->Q_tmp[i*mfqP->npmax],&blas1,
519 			     mfqP->work2,&blas1);
520     if (i==mfqP->nmodelpoints || mfqP->work[i] < minvalue) {
521       minindex=i;
522       minvalue = mfqP->work[i];
523     }
524 
525     if (addallpoints != 0) {
526 	ierr = addpoint(tao,mfqP,i); CHKERRQ(ierr);
527     }
528 
529   }
530 
531   if (!addallpoints) {
532       ierr = addpoint(tao,mfqP,minindex); CHKERRQ(ierr);
533   }
534   PetscFunctionReturn(0);
535 }
536 
537 
538 #undef __FUNCT__
539 #define __FUNCT__ "affpoints"
540 PetscErrorCode affpoints(TAO_POUNDERS *mfqP, PetscReal *xmin,
541 				PetscReal c) {
542     PetscInt i,j;
543     PetscBLASInt blasm=mfqP->m,blasj,blask,blasn=mfqP->n,ione=1,info;
544     PetscBLASInt blasnpmax = mfqP->npmax,blasmaxmn;
545     PetscReal proj,normd;
546     PetscReal *x;
547     PetscErrorCode ierr;
548     PetscFunctionBegin;
549 
550     for (i=mfqP->nHist-1;i>=0;i--) {
551 	ierr = VecGetArray(mfqP->Xhist[i],&x); CHKERRQ(ierr);
552 	for (j=0;j<mfqP->n;j++) {
553 	    mfqP->work[j] = (x[j] - xmin[j])/mfqP->delta;
554 	}
555 	ierr = VecRestoreArray(mfqP->Xhist[i],&x); CHKERRQ(ierr);
556 	BLAScopy_(&blasn,mfqP->work,&ione,mfqP->work2,&ione);
557 	normd = BLASnrm2_(&blasn,mfqP->work,&ione);
558 	if (normd <= c*c) {
559 	  blasj=(mfqP->n - mfqP->nmodelpoints);
560 	    if (!mfqP->q_is_I) {
561 		/* project D onto null */
562 		blask=(mfqP->nmodelpoints);
563 		LAPACKormqr_("R","N",&ione,&blasn,&blask,mfqP->Q,&blasnpmax,mfqP->tau,
564 			     mfqP->work2,&ione,mfqP->mwork,&blasm,&info);
565 
566 		if (info < 0) {
567 		    SETERRQ1(PETSC_COMM_SELF,1,"ormqr returned value %D\n",info);
568 		}
569 	    }
570 	    proj = BLASnrm2_(&blasj,&mfqP->work2[mfqP->nmodelpoints],&ione);
571 
572 	    if (proj >= mfqP->theta1) { /* add this index to model */
573 		mfqP->model_indices[mfqP->nmodelpoints]=i;
574 		mfqP->nmodelpoints++;
575 		BLAScopy_(&blasn,mfqP->work,&ione,&mfqP->Q_tmp[mfqP->npmax*(mfqP->nmodelpoints-1)],&ione);
576 		blask=mfqP->npmax*(mfqP->nmodelpoints);
577 		BLAScopy_(&blask,mfqP->Q_tmp,&ione,mfqP->Q,&ione);
578 		blask = mfqP->nmodelpoints;
579 		blasmaxmn = PetscMax(mfqP->m,mfqP->n);
580 		LAPACKgeqrf_(&blasn,&blask,mfqP->Q,&blasnpmax,mfqP->tau,mfqP->mwork,
581 			     &blasmaxmn,&info);
582 		mfqP->q_is_I = 0;
583 		if (info < 0) {
584 		    SETERRQ1(PETSC_COMM_SELF,1,"geqrf returned value %D\n",info);
585 		}
586 
587 
588 	    }
589 	    if (mfqP->nmodelpoints == mfqP->n)  {
590 		break;
591 	    }
592 	}
593     }
594 
595     PetscFunctionReturn(0);
596 }
597 #undef __FUNCT__
598 #define __FUNCT__ "TaoSolve_POUNDERS"
599 static PetscErrorCode TaoSolve_POUNDERS(TaoSolver tao)
600 {
601   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
602 
603   PetscInt i,ii,j,k,l,iter=0;
604   PetscReal step=1.0;
605   TaoSolverTerminationReason reason = TAO_CONTINUE_ITERATING;
606 
607   PetscInt low,high;
608   PetscReal minnorm;
609   PetscReal *x,*f,*fmin,*xmint;
610   PetscReal cres,deltaold;
611   PetscReal gnorm;
612   PetscBLASInt info,ione=1,iblas;
613   PetscBool valid;
614   PetscReal mdec, rho, normxsp;
615   PetscReal one=1.0,zero=0.0,ratio;
616   PetscBLASInt blasm,blasn,blasnpmax,blasn2;
617   PetscErrorCode ierr;
618 
619 
620   /* n = # of parameters
621      m = dimension (components) of function  */
622 
623   PetscFunctionBegin;
624 
625   if (tao->XL && tao->XU) {
626     /* Check x0 <= XU */
627     PetscReal val;
628     ierr = VecCopy(tao->solution,mfqP->Xhist[0]); CHKERRQ(ierr);
629     ierr = VecAXPY(mfqP->Xhist[0],-1.0,tao->XU); CHKERRQ(ierr);
630     ierr = VecMax(mfqP->Xhist[0],PETSC_NULL,&val); CHKERRQ(ierr);
631     if (val > 1e-10) {
632       SETERRQ(PETSC_COMM_WORLD,1,"X0 > upper bound");
633     }
634 
635     /* Check x0 >= xl */
636     ierr = VecCopy(tao->XL,mfqP->Xhist[0]); CHKERRQ(ierr);
637     ierr = VecAXPY(mfqP->Xhist[0],-1.0,tao->solution); CHKERRQ(ierr);
638     ierr = VecMax(mfqP->Xhist[0],PETSC_NULL,&val); CHKERRQ(ierr);
639     if (val > 1e-10) {
640       SETERRQ(PETSC_COMM_WORLD,1,"X0 < lower bound");
641     }
642     /* Check x0 + delta < XU  -- should be able to get around this eventually */
643 
644     ierr = VecSet(mfqP->Xhist[0],mfqP->delta); CHKERRQ(ierr);
645     ierr = VecAXPY(mfqP->Xhist[0],1.0,tao->solution); CHKERRQ(ierr);
646     ierr = VecAXPY(mfqP->Xhist[0],-1.0,tao->XU); CHKERRQ(ierr);
647     ierr = VecMax(mfqP->Xhist[0],PETSC_NULL,&val); CHKERRQ(ierr);
648     if (val > 1e-10) {
649       SETERRQ(PETSC_COMM_WORLD,1,"X0 + delta > upper bound");
650     }
651 
652   }
653 
654   CHKMEMQ;
655   blasm = mfqP->m; blasn=mfqP->n; blasnpmax = mfqP->npmax;
656   for (i=0;i<mfqP->n*mfqP->n*mfqP->m;i++) mfqP->H[i]=0;
657 
658   ierr = VecCopy(tao->solution,mfqP->Xhist[0]); CHKERRQ(ierr);
659   CHKMEMQ;
660   ierr = TaoComputeSeparableObjective(tao,tao->solution,mfqP->Fhist[0]); CHKERRQ(ierr);
661 
662   ierr = VecNorm(mfqP->Fhist[0],NORM_2,&mfqP->Fres[0]); CHKERRQ(ierr);
663   if (PetscIsInfOrNanReal(mfqP->Fres[0])) {
664     SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
665   }
666   mfqP->Fres[0]*=mfqP->Fres[0];
667   mfqP->minindex = 0;
668   minnorm = mfqP->Fres[0];
669   ierr = VecGetOwnershipRange(mfqP->Xhist[0],&low,&high); CHKERRQ(ierr);
670   for (i=1;i<mfqP->n+1;i++) {
671       ierr = VecCopy(tao->solution,mfqP->Xhist[i]); CHKERRQ(ierr);
672       if (i-1 >= low && i-1 < high) {
673 	  ierr = VecGetArray(mfqP->Xhist[i],&x); CHKERRQ(ierr);
674 	  x[i-1-low] += mfqP->delta;
675 	  ierr = VecRestoreArray(mfqP->Xhist[i],&x); CHKERRQ(ierr);
676       }
677       CHKMEMQ;
678       ierr = TaoComputeSeparableObjective(tao,mfqP->Xhist[i],mfqP->Fhist[i]); CHKERRQ(ierr);
679       ierr = VecNorm(mfqP->Fhist[i],NORM_2,&mfqP->Fres[i]); CHKERRQ(ierr);
680       if (PetscIsInfOrNanReal(mfqP->Fres[i])) {
681 	SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
682       }
683       mfqP->Fres[i]*=mfqP->Fres[i];
684       if (mfqP->Fres[i] < minnorm) {
685 	  mfqP->minindex = i;
686 	  minnorm = mfqP->Fres[i];
687       }
688   }
689 
690   ierr = VecCopy(mfqP->Xhist[mfqP->minindex],tao->solution); CHKERRQ(ierr);
691   ierr = VecCopy(mfqP->Fhist[mfqP->minindex],tao->sep_objective); CHKERRQ(ierr);
692   /* Gather mpi vecs to one big local vec */
693 
694 
695 
696   /* Begin serial code */
697 
698   /* Disp[i] = Xi-xmin, i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
699   /* Fdiff[i] = (Fi-Fmin)', i=1,..,mfqP->minindex-1,mfqP->minindex+1,..,n */
700   /* (Column oriented for blas calls) */
701   ii=0;
702 
703   if (mfqP->mpisize == 1) {
704       ierr = VecGetArray(mfqP->Xhist[mfqP->minindex],&xmint); CHKERRQ(ierr);
705       for (i=0;i<mfqP->n;i++) mfqP->xmin[i] = xmint[i];
706       ierr = VecRestoreArray(mfqP->Xhist[mfqP->minindex],&xmint); CHKERRQ(ierr);
707       ierr = VecGetArray(mfqP->Fhist[mfqP->minindex],&fmin); CHKERRQ(ierr);
708       for (i=0;i<mfqP->n+1;i++) {
709 	  if (i == mfqP->minindex) continue;
710 
711 	  ierr = VecGetArray(mfqP->Xhist[i],&x); CHKERRQ(ierr);
712 	  for (j=0;j<mfqP->n;j++) {
713 	      mfqP->Disp[ii+mfqP->npmax*j] = (x[j] - mfqP->xmin[j])/mfqP->delta;
714 	  }
715 	  ierr = VecRestoreArray(mfqP->Xhist[i],&x); CHKERRQ(ierr);
716 
717 	  ierr = VecGetArray(mfqP->Fhist[i],&f); CHKERRQ(ierr);
718 	  for (j=0;j<mfqP->m;j++) {
719 	      mfqP->Fdiff[ii+mfqP->n*j] = f[j] - fmin[j];
720 	  }
721 	  ierr = VecRestoreArray(mfqP->Fhist[i],&f); CHKERRQ(ierr);
722 	  mfqP->model_indices[ii++] = i;
723 
724       }
725       for (j=0;j<mfqP->m;j++) {
726 	  mfqP->C[j] = fmin[j];
727       }
728       ierr = VecRestoreArray(mfqP->Fhist[mfqP->minindex],&fmin); CHKERRQ(ierr);
729 
730   } else {
731       ierr = VecScatterBegin(mfqP->scatterx,mfqP->Xhist[mfqP->minindex],mfqP->localxmin,INSERT_VALUES,SCATTER_FORWARD); CHKERRQ(ierr);
732       ierr = VecScatterEnd(mfqP->scatterx,mfqP->Xhist[mfqP->minindex],mfqP->localxmin,INSERT_VALUES,SCATTER_FORWARD); CHKERRQ(ierr);
733       ierr = VecGetArray(mfqP->localxmin,&xmint); CHKERRQ(ierr);
734       for (i=0;i<mfqP->n;i++) mfqP->xmin[i] = xmint[i];
735       ierr = VecRestoreArray(mfqP->localxmin,&mfqP->xmin); CHKERRQ(ierr);
736 
737 
738 
739       ierr = VecScatterBegin(mfqP->scatterf,mfqP->Fhist[mfqP->minindex],mfqP->localfmin,INSERT_VALUES,SCATTER_FORWARD); CHKERRQ(ierr);
740       ierr = VecScatterEnd(mfqP->scatterf,mfqP->Fhist[mfqP->minindex],mfqP->localfmin,INSERT_VALUES,SCATTER_FORWARD); CHKERRQ(ierr);
741       ierr = VecGetArray(mfqP->localfmin,&fmin); CHKERRQ(ierr);
742       for (i=0;i<mfqP->n+1;i++) {
743 	  if (i == mfqP->minindex) continue;
744 
745 	  mfqP->model_indices[ii++] = i;
746 	  ierr = VecScatterBegin(mfqP->scatterx,mfqP->Xhist[ii],mfqP->localx,
747 				 INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr);
748 	  ierr = VecScatterEnd(mfqP->scatterx,mfqP->Xhist[ii],mfqP->localx,
749 				 INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr);
750 	  ierr = VecGetArray(mfqP->localx,&x); CHKERRQ(ierr);
751 	  for (j=0;j<mfqP->n;j++) {
752 	      mfqP->Disp[i+mfqP->npmax*j] = (x[j] - mfqP->xmin[j])/mfqP->delta;
753 	  }
754 	  ierr = VecRestoreArray(mfqP->localx,&x); CHKERRQ(ierr);
755 
756 
757 	  ierr = VecScatterBegin(mfqP->scatterf,mfqP->Fhist[ii],mfqP->localf,
758 				 INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr);
759 	  ierr = VecScatterEnd(mfqP->scatterf,mfqP->Fhist[ii],mfqP->localf,
760 				 INSERT_VALUES, SCATTER_FORWARD); CHKERRQ(ierr);
761 	  ierr = VecGetArray(mfqP->localf,&f); CHKERRQ(ierr);
762 	  for (j=0;j<mfqP->m;j++) {
763 	      mfqP->Fdiff[i*mfqP->n+j] = f[j] - fmin[j];
764 
765 	  }
766 	  ierr = VecRestoreArray(mfqP->localf,&f); CHKERRQ(ierr);
767       }
768       for (j=0;j<mfqP->m;j++) {
769 	  mfqP->C[j] = fmin[j];
770       }
771 
772       ierr = VecRestoreArray(mfqP->localfmin,&fmin); CHKERRQ(ierr);
773 
774   }
775 
776 
777   /* Determine the initial quadratic models */
778 
779   /* G = D(ModelIn,:) \ (F(ModelIn,1:m)-repmat(F(xkin,1:m),n,1)); */
780   /* D (nxn) Fdiff (nxm)  => G (nxm) */
781   blasn2 = blasn;
782   LAPACKgesv_(&blasn,&blasm,mfqP->Disp,&blasnpmax,mfqP->iwork,mfqP->Fdiff,&blasn2,&info);
783   ierr = PetscInfo1(tao,"gesv returned %D\n",info); CHKERRQ(ierr);
784 
785   cres = minnorm;
786   /* Gres = G*F(xkin,1:m)'  G (nxm)   Fk (m)   */
787   BLASgemv_("N",&blasn,&blasm,&one,mfqP->Fdiff,&blasn,mfqP->C,&ione,&zero,mfqP->Gres,&ione);
788 
789   /*  Hres = G*G'  */
790   BLASgemm_("N","T",&blasn,&blasn,&blasm,&one,mfqP->Fdiff, &blasn,mfqP->Fdiff,&blasn,&zero,mfqP->Hres,&blasn);
791 
792   valid = PETSC_TRUE;
793 
794   ierr = VecSetValues(tao->gradient,mfqP->n,mfqP->indices,mfqP->Gres,INSERT_VALUES); CHKERRQ(ierr);
795   ierr = VecAssemblyBegin(tao->gradient);
796   ierr = VecAssemblyEnd(tao->gradient);
797   ierr = VecNorm(tao->gradient,NORM_2,&gnorm); CHKERRQ(ierr);
798   gnorm *= mfqP->delta;
799   ierr = VecCopy(mfqP->Xhist[mfqP->minindex],tao->solution); CHKERRQ(ierr);
800   ierr = TaoMonitor(tao, iter, minnorm, gnorm, 0.0, step, &reason); CHKERRQ(ierr);
801   mfqP->nHist = mfqP->n+1;
802   mfqP->nmodelpoints = mfqP->n+1;
803 
804   while (reason == TAO_CONTINUE_ITERATING) {
805 
806     PetscReal gnm = 1e-4;
807     iter++;
808     /* Solve the subproblem min{Q(s): ||s|| <= delta} */
809     ierr = gqtwrap(tao,&gnm,&mdec); CHKERRQ(ierr);
810     /* Evaluate the function at the new point */
811 
812     for (i=0;i<mfqP->n;i++) {
813 	mfqP->work[i] = mfqP->Xsubproblem[i]*mfqP->delta + mfqP->xmin[i];
814     }
815     ierr = VecDuplicate(tao->solution,&mfqP->Xhist[mfqP->nHist]); CHKERRQ(ierr);
816     ierr = VecDuplicate(tao->sep_objective,&mfqP->Fhist[mfqP->nHist]); CHKERRQ(ierr);
817     ierr = VecSetValues(mfqP->Xhist[mfqP->nHist],mfqP->n,mfqP->indices,mfqP->work,INSERT_VALUES); CHKERRQ(ierr);
818     ierr = VecAssemblyBegin(mfqP->Xhist[mfqP->nHist]); CHKERRQ(ierr);
819     ierr = VecAssemblyEnd(mfqP->Xhist[mfqP->nHist]); CHKERRQ(ierr);
820 
821     ierr = TaoComputeSeparableObjective(tao,mfqP->Xhist[mfqP->nHist],mfqP->Fhist[mfqP->nHist]); CHKERRQ(ierr);
822     ierr = VecNorm(mfqP->Fhist[mfqP->nHist],NORM_2,&mfqP->Fres[mfqP->nHist]); CHKERRQ(ierr);
823     if (PetscIsInfOrNanReal(mfqP->Fres[mfqP->nHist])) {
824       SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
825     }
826     mfqP->Fres[mfqP->nHist]*=mfqP->Fres[mfqP->nHist];
827     rho = (mfqP->Fres[mfqP->minindex] - mfqP->Fres[mfqP->nHist]) / mdec;
828     mfqP->nHist++;
829 
830     /* Update the center */
831     if ((rho >= mfqP->eta1) || (rho > mfqP->eta0 && valid==PETSC_TRUE)) {
832 	/* Update model to reflect new base point */
833 	for (i=0;i<mfqP->n;i++) {
834 	    mfqP->work[i] = (mfqP->work[i] - mfqP->xmin[i])/mfqP->delta;
835 	}
836 	for (j=0;j<mfqP->m;j++) {
837 	    /* C(j) = C(j) + work*G(:,j) + .5*work*H(:,:,j)*work';
838 	       G(:,j) = G(:,j) + H(:,:,j)*work' */
839 	    for (k=0;k<mfqP->n;k++) {
840 		mfqP->work2[k]=0.0;
841 		for (l=0;l<mfqP->n;l++) {
842 		    mfqP->work2[k]+=mfqP->H[j + mfqP->m*(k + l*mfqP->n)]*mfqP->work[l];
843 		}
844 	    }
845 	    for (i=0;i<mfqP->n;i++) {
846 		mfqP->C[j]+=mfqP->work[i]*(mfqP->Fdiff[i + mfqP->n* j] + 0.5*mfqP->work2[i]);
847 		mfqP->Fdiff[i+mfqP->n*j] +=mfqP-> work2[i];
848 	    }
849 	}
850 	/* Cres += work*Gres + .5*work*Hres*work';
851 	   Gres += Hres*work'; */
852 
853 	BLASgemv_("N",&blasn,&blasn,&one,mfqP->Hres,&blasn,mfqP->work,&ione,
854 		  &zero,mfqP->work2,&ione);
855 	for (i=0;j<mfqP->n;j++) {
856 	    cres += mfqP->work[i]*(mfqP->Gres[i]  + 0.5*mfqP->work2[i]);
857 	    mfqP->Gres[i] += mfqP->work2[i];
858 	}
859 	mfqP->minindex = mfqP->nHist-1;
860 	minnorm = mfqP->Fres[mfqP->minindex];
861 	/* Change current center */
862 	ierr = VecGetArray(mfqP->Xhist[mfqP->minindex],&xmint); CHKERRQ(ierr);
863 	for (i=0;i<mfqP->n;i++) {
864 	    mfqP->xmin[i] = xmint[i];
865 	}
866 	ierr = VecRestoreArray(mfqP->Xhist[mfqP->minindex],&xmint); CHKERRQ(ierr);
867 
868 
869     }
870 
871     /* Evaluate at a model-improving point if necessary */
872     if (valid == PETSC_FALSE) {
873 	mfqP->q_is_I = 1;
874 	ierr = affpoints(mfqP,mfqP->xmin,mfqP->c1); CHKERRQ(ierr);
875 	if (mfqP->nmodelpoints < mfqP->n) {
876 	  ierr = PetscInfo(tao,"Model not valid -- model-improving");
877 	  ierr = modelimprove(tao,mfqP,1); CHKERRQ(ierr);
878 	}
879     }
880 
881 
882     /* Update the trust region radius */
883     deltaold = mfqP->delta;
884     normxsp = 0;
885     for (i=0;i<mfqP->n;i++) {
886 	normxsp += mfqP->Xsubproblem[i]*mfqP->Xsubproblem[i];
887     }
888     normxsp = PetscSqrtReal(normxsp);
889     if (rho >= mfqP->eta1 && normxsp > 0.5*mfqP->delta) {
890 	mfqP->delta = PetscMin(mfqP->delta*mfqP->gamma1,mfqP->deltamax);
891     } else if (valid == PETSC_TRUE) {
892 	mfqP->delta = PetscMax(mfqP->delta*mfqP->gamma0,mfqP->deltamin);
893     }
894 
895     /* Compute the next interpolation set */
896     mfqP->q_is_I = 1;
897     mfqP->nmodelpoints=0;
898     ierr = affpoints(mfqP,mfqP->xmin,mfqP->c1); CHKERRQ(ierr);
899     if (mfqP->nmodelpoints == mfqP->n) {
900       valid = PETSC_TRUE;
901     } else {
902       valid = PETSC_FALSE;
903       ierr = affpoints(mfqP,mfqP->xmin,mfqP->c2); CHKERRQ(ierr);
904       if (mfqP->n > mfqP->nmodelpoints) {
905 	ierr = PetscInfo(tao,"Model not valid -- adding geometry points");
906 	ierr = modelimprove(tao,mfqP,mfqP->n - mfqP->nmodelpoints); CHKERRQ(ierr);
907       }
908     }
909     for (i=mfqP->nmodelpoints;i>0;i--) {
910 	mfqP->model_indices[i] = mfqP->model_indices[i-1];
911     }
912     mfqP->nmodelpoints++;
913     mfqP->model_indices[0] = mfqP->minindex;
914     ierr = morepoints(mfqP); CHKERRQ(ierr);
915     for (i=0;i<mfqP->nmodelpoints;i++) {
916 	ierr = VecGetArray(mfqP->Xhist[mfqP->model_indices[i]],&x); CHKERRQ(ierr);
917 	for (j=0;j<mfqP->n;j++) {
918 	    mfqP->Disp[i + mfqP->npmax*j] = (x[j]  - mfqP->xmin[j]) / deltaold;
919 	}
920 	ierr = VecRestoreArray(mfqP->Xhist[mfqP->model_indices[i]],&x); CHKERRQ(ierr);
921 	ierr = VecGetArray(mfqP->Fhist[mfqP->model_indices[i]],&f); CHKERRQ(ierr);
922 	for (j=0;j<mfqP->m;j++) {
923 	    for (k=0;k<mfqP->n;k++)  {
924 		mfqP->work[k]=0.0;
925 		for (l=0;l<mfqP->n;l++) {
926 		    mfqP->work[k] += mfqP->H[j + mfqP->m*(k + mfqP->n*l)] * mfqP->Disp[i + mfqP->npmax*l];
927 		}
928 	    }
929 	    mfqP->RES[j*mfqP->npmax + i] = -mfqP->C[j] - BLASdot_(&blasn,&mfqP->Fdiff[j*mfqP->n],&ione,&mfqP->Disp[i],&blasnpmax) - 0.5*BLASdot_(&blasn,mfqP->work,&ione,&mfqP->Disp[i],&blasnpmax) + f[j];
930 	}
931 	ierr = VecRestoreArray(mfqP->Fhist[mfqP->model_indices[i]],&f); CHKERRQ(ierr);
932     }
933 
934 
935     /* Update the quadratic model */
936     ierr = getquadpounders(mfqP); CHKERRQ(ierr);
937     ierr = VecGetArray(mfqP->Fhist[mfqP->minindex],&fmin); CHKERRQ(ierr);
938     BLAScopy_(&blasm,fmin,&ione,mfqP->C,&ione);
939     /* G = G*(delta/deltaold) + Gdel */
940     ratio = mfqP->delta/deltaold;
941     iblas = blasm*blasn;
942     BLASscal_(&iblas,&ratio,mfqP->Fdiff,&ione);
943     BLASaxpy_(&iblas,&one,mfqP->Gdel,&ione,mfqP->Fdiff,&ione);
944     /* H = H*(delta/deltaold) + Hdel */
945     iblas = blasm*blasn*blasn;
946     ratio *= ratio;
947     BLASscal_(&iblas,&ratio,mfqP->H,&ione);
948     BLASaxpy_(&iblas,&one,mfqP->Hdel,&ione,mfqP->H,&ione);
949 
950 
951     /* Get residuals */
952     cres = mfqP->Fres[mfqP->minindex];
953     /* Gres = G*F(xkin,1:m)' */
954     BLASgemv_("N",&blasn,&blasm,&one,mfqP->Fdiff,&blasn,mfqP->C,&ione,&zero,mfqP->Gres,&ione);
955     /* Hres = sum i=1..m {F(xkin,i)*H(:,:,i)}   + G*G' */
956     BLASgemm_("N","T",&blasn,&blasn,&blasm,&one,mfqP->Fdiff,&blasn,mfqP->Fdiff,&blasn,
957 	      &zero,mfqP->Hres,&blasn);
958 
959     iblas = mfqP->n*mfqP->n;
960 
961     for (j=0;j<mfqP->m;j++) {
962 	BLASaxpy_(&iblas,&fmin[j],&mfqP->H[j],&blasm,mfqP->Hres,&ione);
963     }
964 
965     /* Export solution and gradient residual to TAO */
966     ierr = VecCopy(mfqP->Xhist[mfqP->minindex],tao->solution); CHKERRQ(ierr);
967     ierr = VecSetValues(tao->gradient,mfqP->n,mfqP->indices,mfqP->Gres,INSERT_VALUES); CHKERRQ(ierr);
968     ierr = VecAssemblyBegin(tao->gradient);
969     ierr = VecAssemblyEnd(tao->gradient);
970     ierr = VecNorm(tao->gradient,NORM_2,&gnorm); CHKERRQ(ierr);
971     gnorm *= mfqP->delta;
972     /*  final criticality test */
973     ierr = TaoMonitor(tao, iter, minnorm, gnorm, 0.0, step, &reason); CHKERRQ(ierr);
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "TaoSetUp_POUNDERS"
980 static PetscErrorCode TaoSetUp_POUNDERS(TaoSolver tao)
981 {
982     TAO_POUNDERS *mfqP = (TAO_POUNDERS*)tao->data;
983     PetscInt i;
984     IS isfloc,isfglob,isxloc,isxglob;
985     PetscErrorCode ierr;
986 
987     PetscFunctionBegin;
988 
989     if (!tao->gradient) {ierr = VecDuplicate(tao->solution,&tao->gradient); CHKERRQ(ierr);  }
990     if (!tao->stepdirection) {ierr = VecDuplicate(tao->solution,&tao->stepdirection); CHKERRQ(ierr);  }
991     ierr = VecGetSize(tao->solution,&mfqP->n); CHKERRQ(ierr);
992     ierr = VecGetSize(tao->sep_objective,&mfqP->m); CHKERRQ(ierr);
993     mfqP->c1 = PetscSqrtReal((PetscReal)mfqP->n);
994     if (mfqP->npmax == PETSC_DEFAULT) {
995       mfqP->npmax = 2*mfqP->n + 1;
996     }
997     mfqP->npmax = PetscMin((mfqP->n+1)*(mfqP->n+2)/2,mfqP->npmax);
998     mfqP->npmax = PetscMax(mfqP->npmax, mfqP->n+2);
999 
1000     ierr = PetscMalloc((tao->max_funcs+10)*sizeof(Vec),&mfqP->Xhist); CHKERRQ(ierr);
1001     ierr = PetscMalloc((tao->max_funcs+10)*sizeof(Vec),&mfqP->Fhist); CHKERRQ(ierr);
1002     for (i=0;i<mfqP->n +1;i++) {
1003 	ierr = VecDuplicate(tao->solution,&mfqP->Xhist[i]); CHKERRQ(ierr);
1004 	ierr = VecDuplicate(tao->sep_objective,&mfqP->Fhist[i]); CHKERRQ(ierr);
1005     }
1006     ierr = VecDuplicate(tao->solution,&mfqP->workxvec); CHKERRQ(ierr);
1007     mfqP->nHist = 0;
1008 
1009     ierr = PetscMalloc((tao->max_funcs+10)*sizeof(PetscReal),&mfqP->Fres); CHKERRQ(ierr);
1010     ierr = PetscMalloc(mfqP->npmax*mfqP->m*sizeof(PetscReal),&mfqP->RES); CHKERRQ(ierr);
1011     ierr = PetscMalloc(mfqP->n*sizeof(PetscReal),&mfqP->work); CHKERRQ(ierr);
1012     ierr = PetscMalloc(mfqP->n*sizeof(PetscReal),&mfqP->work2); CHKERRQ(ierr);
1013     ierr = PetscMalloc(mfqP->n*sizeof(PetscReal),&mfqP->work3); CHKERRQ(ierr);
1014     ierr = PetscMalloc(PetscMax(mfqP->m,mfqP->n+1)*sizeof(PetscReal),&mfqP->mwork); CHKERRQ(ierr);
1015     ierr = PetscMalloc((mfqP->npmax - mfqP->n - 1)*sizeof(PetscReal),&mfqP->omega); CHKERRQ(ierr);
1016     ierr = PetscMalloc((mfqP->n * (mfqP->n+1) / 2)*sizeof(PetscReal),&mfqP->beta); CHKERRQ(ierr);
1017     ierr = PetscMalloc((mfqP->n + 1) *sizeof(PetscReal),&mfqP->alpha); CHKERRQ(ierr);
1018 
1019     ierr = PetscMalloc(mfqP->n*mfqP->n*mfqP->m*sizeof(PetscReal),&mfqP->H); CHKERRQ(ierr);
1020     ierr = PetscMalloc(mfqP->npmax*mfqP->npmax*sizeof(PetscReal),&mfqP->Q); CHKERRQ(ierr);
1021     ierr = PetscMalloc(mfqP->npmax*mfqP->npmax*sizeof(PetscReal),&mfqP->Q_tmp); CHKERRQ(ierr);
1022     ierr = PetscMalloc(mfqP->n*(mfqP->n+1)/2*(mfqP->npmax)*sizeof(PetscReal),&mfqP->L); CHKERRQ(ierr);
1023     ierr = PetscMalloc(mfqP->n*(mfqP->n+1)/2*(mfqP->npmax)*sizeof(PetscReal),&mfqP->L_tmp); CHKERRQ(ierr);
1024     ierr = PetscMalloc(mfqP->n*(mfqP->n+1)/2*(mfqP->npmax)*sizeof(PetscReal),&mfqP->L_save); CHKERRQ(ierr);
1025     ierr = PetscMalloc(mfqP->n*(mfqP->n+1)/2*(mfqP->npmax)*sizeof(PetscReal),&mfqP->N); CHKERRQ(ierr);
1026     ierr = PetscMalloc(mfqP->npmax * (mfqP->n+1) * sizeof(PetscReal),&mfqP->M); CHKERRQ(ierr);
1027     ierr = PetscMalloc(mfqP->npmax * (mfqP->npmax - mfqP->n - 1) * sizeof(PetscReal), &mfqP->Z); CHKERRQ(ierr);
1028     ierr = PetscMalloc(mfqP->npmax*sizeof(PetscReal),&mfqP->tau); CHKERRQ(ierr);
1029     ierr = PetscMalloc(mfqP->npmax*sizeof(PetscReal),&mfqP->tau_tmp); CHKERRQ(ierr);
1030     mfqP->nmax = PetscMax(5*mfqP->npmax,mfqP->n*(mfqP->n+1)/2);
1031     ierr = PetscMalloc(mfqP->nmax*sizeof(PetscReal),&mfqP->npmaxwork); CHKERRQ(ierr);
1032     ierr = PetscMalloc(mfqP->nmax*sizeof(PetscBLASInt),&mfqP->npmaxiwork); CHKERRQ(ierr);
1033     ierr = PetscMalloc(mfqP->n*sizeof(PetscReal),&mfqP->xmin); CHKERRQ(ierr);
1034     ierr = PetscMalloc(mfqP->m*sizeof(PetscReal),&mfqP->C); CHKERRQ(ierr);
1035     ierr = PetscMalloc(mfqP->m*mfqP->n*sizeof(PetscReal),&mfqP->Fdiff); CHKERRQ(ierr);
1036     ierr = PetscMalloc(mfqP->npmax*mfqP->n*sizeof(PetscReal),&mfqP->Disp); CHKERRQ(ierr);
1037     ierr = PetscMalloc(mfqP->n*sizeof(PetscReal),&mfqP->Gres); CHKERRQ(ierr);
1038     ierr = PetscMalloc(mfqP->n*mfqP->n*sizeof(PetscReal),&mfqP->Hres); CHKERRQ(ierr);
1039     ierr = PetscMalloc(mfqP->n*mfqP->n*sizeof(PetscReal),&mfqP->Gpoints); CHKERRQ(ierr);
1040     ierr = PetscMalloc(mfqP->npmax*sizeof(PetscInt),&mfqP->model_indices); CHKERRQ(ierr);
1041     ierr = PetscMalloc(mfqP->n*sizeof(PetscReal),&mfqP->Xsubproblem); CHKERRQ(ierr);
1042     ierr = PetscMalloc(mfqP->m*mfqP->n*sizeof(PetscReal),&mfqP->Gdel); CHKERRQ(ierr);
1043     ierr = PetscMalloc(mfqP->n*mfqP->n*mfqP->m*sizeof(PetscReal), &mfqP->Hdel); CHKERRQ(ierr);
1044     ierr = PetscMalloc(PetscMax(mfqP->m,mfqP->n)*sizeof(PetscInt),&mfqP->indices); CHKERRQ(ierr);
1045     ierr = PetscMalloc(mfqP->n*sizeof(PetscBLASInt),&mfqP->iwork); CHKERRQ(ierr);
1046     for (i=0;i<PetscMax(mfqP->m,mfqP->n);i++) {
1047 	mfqP->indices[i] = i;
1048     }
1049   ierr = MPI_Comm_size(((PetscObject)tao)->comm,&mfqP->mpisize); CHKERRQ(ierr);
1050   if (mfqP->mpisize > 1) {
1051       VecCreateSeq(PETSC_COMM_SELF,mfqP->n,&mfqP->localx); CHKERRQ(ierr);
1052       VecCreateSeq(PETSC_COMM_SELF,mfqP->n,&mfqP->localxmin); CHKERRQ(ierr);
1053       VecCreateSeq(PETSC_COMM_SELF,mfqP->m,&mfqP->localf); CHKERRQ(ierr);
1054       VecCreateSeq(PETSC_COMM_SELF,mfqP->m,&mfqP->localfmin); CHKERRQ(ierr);
1055       ierr = ISCreateStride(MPI_COMM_SELF,mfqP->n,0,1,&isxloc); CHKERRQ(ierr);
1056       ierr = ISCreateStride(MPI_COMM_SELF,mfqP->n,0,1,&isxglob); CHKERRQ(ierr);
1057       ierr = ISCreateStride(MPI_COMM_SELF,mfqP->m,0,1,&isfloc); CHKERRQ(ierr);
1058       ierr = ISCreateStride(MPI_COMM_SELF,mfqP->m,0,1,&isfglob); CHKERRQ(ierr);
1059 
1060 
1061       ierr = VecScatterCreate(tao->solution,isxglob,mfqP->localx,isxloc,&mfqP->scatterx); CHKERRQ(ierr);
1062       ierr = VecScatterCreate(tao->sep_objective,isfglob,mfqP->localf,isfloc,&mfqP->scatterf); CHKERRQ(ierr);
1063 
1064       ierr = ISDestroy(&isxloc); CHKERRQ(ierr);
1065       ierr = ISDestroy(&isxglob); CHKERRQ(ierr);
1066       ierr = ISDestroy(&isfloc); CHKERRQ(ierr);
1067       ierr = ISDestroy(&isfglob); CHKERRQ(ierr);
1068 
1069   }
1070 
1071   if (!mfqP->usegqt) {
1072       KSP       ksp;
1073       PC        pc;
1074       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,mfqP->n,mfqP->n,mfqP->Xsubproblem,&mfqP->subx); CHKERRQ(ierr);
1075       ierr = VecCreateSeq(PETSC_COMM_SELF,mfqP->n,&mfqP->subxl); CHKERRQ(ierr);
1076       ierr = VecDuplicate(mfqP->subxl,&mfqP->subb); CHKERRQ(ierr);
1077       ierr = VecDuplicate(mfqP->subxl,&mfqP->subxu); CHKERRQ(ierr);
1078       ierr = VecDuplicate(mfqP->subxl,&mfqP->subpdel); CHKERRQ(ierr);
1079       ierr = VecDuplicate(mfqP->subxl,&mfqP->subndel); CHKERRQ(ierr);
1080       ierr = TaoCreate(PETSC_COMM_SELF,&mfqP->subtao); CHKERRQ(ierr);
1081       /* ierr = TaoSetType(mfqP->subtao,"tao_bqpip"); CHKERRQ(ierr); */
1082       ierr = TaoSetType(mfqP->subtao,"tao_tron"); CHKERRQ(ierr);
1083       ierr = TaoSetOptionsPrefix(mfqP->subtao,"pounders_subsolver_"); CHKERRQ(ierr);
1084       ierr = TaoSetInitialVector(mfqP->subtao,mfqP->subx); CHKERRQ(ierr);
1085       ierr = TaoSetObjectiveAndGradientRoutine(mfqP->subtao,pounders_fg,(void*)mfqP); CHKERRQ(ierr);
1086       ierr = TaoSetMaximumIterations(mfqP->subtao,mfqP->gqt_maxits); CHKERRQ(ierr);
1087       ierr = TaoGetKSP(mfqP->subtao,&ksp); CHKERRQ(ierr);
1088       if (ksp) {
1089 	ierr = KSPGetPC(ksp,&pc); CHKERRQ(ierr);
1090 	ierr = PCSetType(pc,PCNONE); CHKERRQ(ierr);
1091       }
1092       ierr = TaoSetVariableBounds(mfqP->subtao,mfqP->subxl,mfqP->subxu); CHKERRQ(ierr);
1093       ierr = TaoSetFromOptions(mfqP->subtao); CHKERRQ(ierr);
1094       ierr = MatCreateSeqDense(PETSC_COMM_SELF,mfqP->n,mfqP->n,mfqP->Hres,&mfqP->subH); CHKERRQ(ierr);
1095       ierr = TaoSetHessianRoutine(mfqP->subtao,mfqP->subH,mfqP->subH,pounders_h,(void*)mfqP); CHKERRQ(ierr);
1096 
1097   }
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "TaoDestroy_POUNDERS"
1103 static PetscErrorCode TaoDestroy_POUNDERS(TaoSolver tao)
1104 {
1105   TAO_POUNDERS *mfqP = (TAO_POUNDERS*)tao->data;
1106   PetscInt i;
1107   PetscErrorCode ierr;
1108 
1109 
1110   PetscFunctionBegin;
1111   if (!mfqP->usegqt) {
1112     ierr = TaoDestroy(&mfqP->subtao); CHKERRQ(ierr);
1113     ierr = VecDestroy(&mfqP->subx); CHKERRQ(ierr);
1114     ierr = VecDestroy(&mfqP->subxl); CHKERRQ(ierr);
1115     ierr = VecDestroy(&mfqP->subxu); CHKERRQ(ierr);
1116     ierr = VecDestroy(&mfqP->subb); CHKERRQ(ierr);
1117     ierr = VecDestroy(&mfqP->subpdel); CHKERRQ(ierr);
1118     ierr = VecDestroy(&mfqP->subndel); CHKERRQ(ierr);
1119     ierr = MatDestroy(&mfqP->subH); CHKERRQ(ierr);
1120   }
1121   ierr = PetscFree(mfqP->Fres); CHKERRQ(ierr);
1122   ierr = PetscFree(mfqP->RES); CHKERRQ(ierr);
1123   ierr = PetscFree(mfqP->work); CHKERRQ(ierr);
1124   ierr = PetscFree(mfqP->work2); CHKERRQ(ierr);
1125   ierr = PetscFree(mfqP->work3); CHKERRQ(ierr);
1126   ierr = PetscFree(mfqP->mwork); CHKERRQ(ierr);
1127   ierr = PetscFree(mfqP->omega); CHKERRQ(ierr);
1128   ierr = PetscFree(mfqP->beta); CHKERRQ(ierr);
1129   ierr = PetscFree(mfqP->alpha); CHKERRQ(ierr);
1130   ierr = PetscFree(mfqP->H); CHKERRQ(ierr);
1131   ierr = PetscFree(mfqP->Q); CHKERRQ(ierr);
1132   ierr = PetscFree(mfqP->Q_tmp); CHKERRQ(ierr);
1133   ierr = PetscFree(mfqP->L); CHKERRQ(ierr);
1134   ierr = PetscFree(mfqP->L_tmp); CHKERRQ(ierr);
1135   ierr = PetscFree(mfqP->L_save); CHKERRQ(ierr);
1136   ierr = PetscFree(mfqP->N); CHKERRQ(ierr);
1137   ierr = PetscFree(mfqP->M); CHKERRQ(ierr);
1138   ierr = PetscFree(mfqP->Z); CHKERRQ(ierr);
1139   ierr = PetscFree(mfqP->tau); CHKERRQ(ierr);
1140   ierr = PetscFree(mfqP->tau_tmp); CHKERRQ(ierr);
1141   ierr = PetscFree(mfqP->npmaxwork); CHKERRQ(ierr);
1142   ierr = PetscFree(mfqP->npmaxiwork); CHKERRQ(ierr);
1143   ierr = PetscFree(mfqP->xmin); CHKERRQ(ierr);
1144   ierr = PetscFree(mfqP->C); CHKERRQ(ierr);
1145   ierr = PetscFree(mfqP->Fdiff); CHKERRQ(ierr);
1146   ierr = PetscFree(mfqP->Disp); CHKERRQ(ierr);
1147   ierr = PetscFree(mfqP->Gres); CHKERRQ(ierr);
1148   ierr = PetscFree(mfqP->Hres); CHKERRQ(ierr);
1149   ierr = PetscFree(mfqP->Gpoints); CHKERRQ(ierr);
1150   ierr = PetscFree(mfqP->model_indices); CHKERRQ(ierr);
1151   ierr = PetscFree(mfqP->Xsubproblem); CHKERRQ(ierr);
1152   ierr = PetscFree(mfqP->Gdel); CHKERRQ(ierr);
1153   ierr = PetscFree(mfqP->Hdel); CHKERRQ(ierr);
1154   ierr = PetscFree(mfqP->indices); CHKERRQ(ierr);
1155   ierr = PetscFree(mfqP->iwork); CHKERRQ(ierr);
1156 
1157   for (i=0;i<mfqP->nHist;i++) {
1158       ierr = VecDestroy(&mfqP->Xhist[i]); CHKERRQ(ierr);
1159       ierr = VecDestroy(&mfqP->Fhist[i]); CHKERRQ(ierr);
1160   }
1161   if (mfqP->workxvec) {
1162     ierr = VecDestroy(&mfqP->workxvec); CHKERRQ(ierr);
1163   }
1164   ierr = PetscFree(mfqP->Xhist); CHKERRQ(ierr);
1165   ierr = PetscFree(mfqP->Fhist); CHKERRQ(ierr);
1166 
1167   if (mfqP->mpisize > 1) {
1168       ierr = VecDestroy(&mfqP->localx);  CHKERRQ(ierr);
1169       ierr = VecDestroy(&mfqP->localxmin);  CHKERRQ(ierr);
1170       ierr = VecDestroy(&mfqP->localf);  CHKERRQ(ierr);
1171       ierr = VecDestroy(&mfqP->localfmin);  CHKERRQ(ierr);
1172   }
1173 
1174 
1175 
1176   if (tao->data) {
1177     ierr = PetscFree(tao->data); CHKERRQ(ierr);
1178   }
1179   tao->data = PETSC_NULL;
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "TaoSetFromOptions_POUNDERS"
1185 static PetscErrorCode TaoSetFromOptions_POUNDERS(TaoSolver tao)
1186 {
1187   TAO_POUNDERS *mfqP = (TAO_POUNDERS*)tao->data;
1188   PetscErrorCode ierr;
1189 
1190   PetscFunctionBegin;
1191   ierr = PetscOptionsHead("POUNDERS method for least-squares optimization"); CHKERRQ(ierr);
1192   ierr = PetscOptionsReal("-tao_pounders_delta","initial delta","",mfqP->delta,&mfqP->delta,0); CHKERRQ(ierr);
1193   mfqP->npmax = PETSC_DEFAULT;
1194   ierr = PetscOptionsInt("-tao_pounders_npmax","max number of points in model","",mfqP->npmax,&mfqP->npmax,0); CHKERRQ(ierr);
1195   mfqP->usegqt = PETSC_FALSE;
1196   ierr = PetscOptionsBool("-tao_pounders_gqt","use gqt algorithm for subproblem","",mfqP->usegqt,&mfqP->usegqt,0); CHKERRQ(ierr);
1197   ierr = PetscOptionsTail(); CHKERRQ(ierr);
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "TaoView_POUNDERS"
1203 static PetscErrorCode TaoView_POUNDERS(TaoSolver tao, PetscViewer viewer)
1204 {
1205   TAO_POUNDERS *mfqP = (TAO_POUNDERS *)tao->data;
1206   PetscBool isascii;
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210 
1211   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
1212   if (isascii) {
1213     ierr = PetscViewerASCIIPushTab(viewer); CHKERRQ(ierr);
1214     if (mfqP->usegqt) {
1215       ierr = PetscViewerASCIIPrintf(viewer, "Subproblem solver: gqt\n");
1216     } else {
1217       ierr = PetscViewerASCIIPrintf(viewer, "Subproblem solver: tron\n");
1218     }
1219     ierr = PetscViewerASCIIPopTab(viewer); CHKERRQ(ierr);
1220   } else {
1221     SETERRQ1(((PetscObject)tao)->comm,PETSC_ERR_SUP,"Viewer type %s not supported for TAO NLS",((PetscObject)viewer)->type_name);
1222   }
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 EXTERN_C_BEGIN
1227 #undef __FUNCT__
1228 #define __FUNCT__ "TaoCreate_POUNDERS"
1229 PetscErrorCode TaoCreate_POUNDERS(TaoSolver tao)
1230 {
1231   TAO_POUNDERS *mfqP = (TAO_POUNDERS*)tao->data;
1232   PetscErrorCode ierr;
1233 
1234   PetscFunctionBegin;
1235 
1236   tao->ops->setup = TaoSetUp_POUNDERS;
1237   tao->ops->solve = TaoSolve_POUNDERS;
1238   tao->ops->view = TaoView_POUNDERS;
1239   tao->ops->setfromoptions = TaoSetFromOptions_POUNDERS;
1240   tao->ops->destroy = TaoDestroy_POUNDERS;
1241 
1242 
1243   ierr = PetscNewLog(tao,&mfqP); CHKERRQ(ierr);
1244   tao->data = (void*)mfqP;
1245   tao->max_it = 2000;
1246   tao->max_funcs = 4000;
1247   tao->fatol = 1e-8;
1248   tao->frtol = 1e-8;
1249   mfqP->delta = 0.1;
1250   mfqP->deltamax=1e3;
1251   mfqP->deltamin=1e-6;
1252   mfqP->c2 = 100.0;
1253   mfqP->theta1=1e-5;
1254   mfqP->theta2=1e-4;
1255   mfqP->gamma0=0.5;
1256   mfqP->gamma1=2.0;
1257   mfqP->eta0 = 0.0;
1258   mfqP->eta1 = 0.1;
1259   mfqP->gqt_rtol = 0.001;
1260   mfqP->gqt_maxits = 50;
1261   mfqP->workxvec = 0;
1262   PetscFunctionReturn(0);
1263 }
1264 EXTERN_C_END
1265 
1266