xref: /petsc/src/tao/leastsquares/impls/pounders/gqt.c (revision a7e14dcfba0d07adf6226a919460249440ec94c7)
1*a7e14dcfSSatish Balay #include "petsc.h"
2*a7e14dcfSSatish Balay #include "petscblaslapack.h"
3*a7e14dcfSSatish Balay #include "taolapack.h"
4*a7e14dcfSSatish Balay 
5*a7e14dcfSSatish Balay 
6*a7e14dcfSSatish Balay 
7*a7e14dcfSSatish Balay #undef __FUNCT__
8*a7e14dcfSSatish Balay #define __FUNCT__ "estsv"
9*a7e14dcfSSatish Balay static PetscErrorCode estsv(PetscInt n, PetscReal *r, PetscInt ldr, PetscReal *svmin, PetscReal *z) {
10*a7e14dcfSSatish Balay 
11*a7e14dcfSSatish Balay     PetscBLASInt blas1=1, blasn=n, blasnmi, blasj, blasldr = ldr;
12*a7e14dcfSSatish Balay     PetscInt i,j;
13*a7e14dcfSSatish Balay     PetscReal e,temp,w,wm,ynorm,znorm,s,sm;
14*a7e14dcfSSatish Balay     PetscFunctionBegin;
15*a7e14dcfSSatish Balay     for (i=0;i<n;i++) {
16*a7e14dcfSSatish Balay 	z[i]=0.0;
17*a7e14dcfSSatish Balay     }
18*a7e14dcfSSatish Balay 
19*a7e14dcfSSatish Balay     e = PetscAbs(r[0]);
20*a7e14dcfSSatish Balay     if (e == 0.0) {
21*a7e14dcfSSatish Balay 	*svmin = 0.0;
22*a7e14dcfSSatish Balay 	z[0] = 1.0;
23*a7e14dcfSSatish Balay     } else {
24*a7e14dcfSSatish Balay       /* Solve R'*y = e */
25*a7e14dcfSSatish Balay 	for (i=0;i<n;i++) {
26*a7e14dcfSSatish Balay 
27*a7e14dcfSSatish Balay 	  /* Scale y. The scaling factor (0.01) reduces the number of scalings */
28*a7e14dcfSSatish Balay 	    if (z[i] >= 0.0)
29*a7e14dcfSSatish Balay 		e=-PetscAbs(e);
30*a7e14dcfSSatish Balay 	    else
31*a7e14dcfSSatish Balay 		e = PetscAbs(e);
32*a7e14dcfSSatish Balay 
33*a7e14dcfSSatish Balay 	    if (PetscAbs(e - z[i]) > PetscAbs(r[i + ldr*i])) {
34*a7e14dcfSSatish Balay 		temp = PetscMin(0.01,PetscAbs(r[i + ldr*i]))/
35*a7e14dcfSSatish Balay 		    PetscAbs(e-z[i]);
36*a7e14dcfSSatish Balay 		BLASscal_(&blasn, &temp, z, &blas1);
37*a7e14dcfSSatish Balay 		e = temp*e;
38*a7e14dcfSSatish Balay 	    }
39*a7e14dcfSSatish Balay 
40*a7e14dcfSSatish Balay 	    /* Determine the two possible choices of y[i] */
41*a7e14dcfSSatish Balay 	    if (r[i + ldr*i] == 0.0)
42*a7e14dcfSSatish Balay 		w = wm = 1.0;
43*a7e14dcfSSatish Balay 	    else {
44*a7e14dcfSSatish Balay 		w = (e - z[i]) / r[i + ldr*i];
45*a7e14dcfSSatish Balay 		wm = - (e + z[i]) / r[i + ldr*i];
46*a7e14dcfSSatish Balay 	    }
47*a7e14dcfSSatish Balay 
48*a7e14dcfSSatish Balay 	    /*  Chose y[i] based on the predicted value of y[j] for j>i */
49*a7e14dcfSSatish Balay 	    s = PetscAbs(e - z[i]);
50*a7e14dcfSSatish Balay 	    sm = PetscAbs(e + z[i]);
51*a7e14dcfSSatish Balay 	    for (j=i+1;j<n;j++) {
52*a7e14dcfSSatish Balay 		sm += PetscAbs(z[j] + wm * r[i + ldr*j]);
53*a7e14dcfSSatish Balay 	    }
54*a7e14dcfSSatish Balay 	    if (i < n-1) {
55*a7e14dcfSSatish Balay 		blasnmi = n-i-1;
56*a7e14dcfSSatish Balay 		BLASaxpy_(&blasnmi, &w, &r[i + ldr*(i+1)], &blasldr, &z[i+1], &blas1);
57*a7e14dcfSSatish Balay 		s += BLASasum_(&blasnmi, &z[i+1], &blas1);
58*a7e14dcfSSatish Balay 	    }
59*a7e14dcfSSatish Balay 	    if (s < sm) {
60*a7e14dcfSSatish Balay 		temp = wm - w;
61*a7e14dcfSSatish Balay 		w = wm;
62*a7e14dcfSSatish Balay 		if (i < n-1) {
63*a7e14dcfSSatish Balay 		    BLASaxpy_(&blasnmi, &temp, &r[i + ldr*(i+1)], &blasldr, &z[i+1], &blas1);
64*a7e14dcfSSatish Balay 		}
65*a7e14dcfSSatish Balay 	    }
66*a7e14dcfSSatish Balay 	    z[i] = w;
67*a7e14dcfSSatish Balay 	}
68*a7e14dcfSSatish Balay 
69*a7e14dcfSSatish Balay 	ynorm = BLASnrm2_(&blasn, z, &blas1);
70*a7e14dcfSSatish Balay 
71*a7e14dcfSSatish Balay 	/* Solve R*z = y */
72*a7e14dcfSSatish Balay 	for (j=n-1; j>=0; j--) {
73*a7e14dcfSSatish Balay 
74*a7e14dcfSSatish Balay 	  /* Scale z */
75*a7e14dcfSSatish Balay 
76*a7e14dcfSSatish Balay 	    if (PetscAbs(z[j]) > PetscAbs(r[j + ldr*j])) {
77*a7e14dcfSSatish Balay 		temp = PetscMin(0.01, PetscAbs(r[j + ldr*j] / z[j]));
78*a7e14dcfSSatish Balay 		BLASscal_(&blasn, &temp, z, &blas1);
79*a7e14dcfSSatish Balay 		ynorm *=temp;
80*a7e14dcfSSatish Balay 	    }
81*a7e14dcfSSatish Balay 	    if (r[j + ldr*j] == 0) {
82*a7e14dcfSSatish Balay 		z[j] = 1.0;
83*a7e14dcfSSatish Balay 	    } else {
84*a7e14dcfSSatish Balay 		z[j] = z[j] / r[j + ldr*j];
85*a7e14dcfSSatish Balay 	    }
86*a7e14dcfSSatish Balay 	    temp = -z[j];
87*a7e14dcfSSatish Balay 	    blasj=j;
88*a7e14dcfSSatish Balay 	    BLASaxpy_(&blasj,&temp,&r[0+ldr*j],&blas1,z,&blas1);
89*a7e14dcfSSatish Balay 	}
90*a7e14dcfSSatish Balay 
91*a7e14dcfSSatish Balay 	/* Compute svmin and normalize z */
92*a7e14dcfSSatish Balay 	znorm = 1.0 / BLASnrm2_(&blasn, z, &blas1);
93*a7e14dcfSSatish Balay 	*svmin = ynorm*znorm;
94*a7e14dcfSSatish Balay 	BLASscal_(&blasn, &znorm, z, &blas1);
95*a7e14dcfSSatish Balay 
96*a7e14dcfSSatish Balay     }
97*a7e14dcfSSatish Balay 
98*a7e14dcfSSatish Balay     PetscFunctionReturn(0);
99*a7e14dcfSSatish Balay }
100*a7e14dcfSSatish Balay 
101*a7e14dcfSSatish Balay 
102*a7e14dcfSSatish Balay 
103*a7e14dcfSSatish Balay /*
104*a7e14dcfSSatish Balay c     ***********
105*a7e14dcfSSatish Balay c
106*a7e14dcfSSatish Balay c     Subroutine dgqt
107*a7e14dcfSSatish Balay c
108*a7e14dcfSSatish Balay c     Given an n by n symmetric matrix A, an n-vector b, and a
109*a7e14dcfSSatish Balay c     positive number delta, this subroutine determines a vector
110*a7e14dcfSSatish Balay c     x which approximately minimizes the quadratic function
111*a7e14dcfSSatish Balay c
112*a7e14dcfSSatish Balay c           f(x) = (1/2)*x'*A*x + b'*x
113*a7e14dcfSSatish Balay c
114*a7e14dcfSSatish Balay c     subject to the Euclidean norm constraint
115*a7e14dcfSSatish Balay c
116*a7e14dcfSSatish Balay c           norm(x) <= delta.
117*a7e14dcfSSatish Balay c
118*a7e14dcfSSatish Balay c     This subroutine computes an approximation x and a Lagrange
119*a7e14dcfSSatish Balay c     multiplier par such that either par is zero and
120*a7e14dcfSSatish Balay c
121*a7e14dcfSSatish Balay c            norm(x) <= (1+rtol)*delta,
122*a7e14dcfSSatish Balay c
123*a7e14dcfSSatish Balay c     or par is positive and
124*a7e14dcfSSatish Balay c
125*a7e14dcfSSatish Balay c            abs(norm(x) - delta) <= rtol*delta.
126*a7e14dcfSSatish Balay c
127*a7e14dcfSSatish Balay c     If xsol is the solution to the problem, the approximation x
128*a7e14dcfSSatish Balay c     satisfies
129*a7e14dcfSSatish Balay c
130*a7e14dcfSSatish Balay c            f(x) <= ((1 - rtol)**2)*f(xsol)
131*a7e14dcfSSatish Balay c
132*a7e14dcfSSatish Balay c     The subroutine statement is
133*a7e14dcfSSatish Balay c
134*a7e14dcfSSatish Balay c       subroutine dgqt(n,a,lda,b,delta,rtol,atol,itmax,
135*a7e14dcfSSatish Balay c                        par,f,x,info,z,wa1,wa2)
136*a7e14dcfSSatish Balay c
137*a7e14dcfSSatish Balay c     where
138*a7e14dcfSSatish Balay c
139*a7e14dcfSSatish Balay c       n is an integer variable.
140*a7e14dcfSSatish Balay c         On entry n is the order of A.
141*a7e14dcfSSatish Balay c         On exit n is unchanged.
142*a7e14dcfSSatish Balay c
143*a7e14dcfSSatish Balay c       a is a double precision array of dimension (lda,n).
144*a7e14dcfSSatish Balay c         On entry the full upper triangle of a must contain the
145*a7e14dcfSSatish Balay c            full upper triangle of the symmetric matrix A.
146*a7e14dcfSSatish Balay c         On exit the array contains the matrix A.
147*a7e14dcfSSatish Balay c
148*a7e14dcfSSatish Balay c       lda is an integer variable.
149*a7e14dcfSSatish Balay c         On entry lda is the leading dimension of the array a.
150*a7e14dcfSSatish Balay c         On exit lda is unchanged.
151*a7e14dcfSSatish Balay c
152*a7e14dcfSSatish Balay c       b is an double precision array of dimension n.
153*a7e14dcfSSatish Balay c         On entry b specifies the linear term in the quadratic.
154*a7e14dcfSSatish Balay c         On exit b is unchanged.
155*a7e14dcfSSatish Balay c
156*a7e14dcfSSatish Balay c       delta is a double precision variable.
157*a7e14dcfSSatish Balay c         On entry delta is a bound on the Euclidean norm of x.
158*a7e14dcfSSatish Balay c         On exit delta is unchanged.
159*a7e14dcfSSatish Balay c
160*a7e14dcfSSatish Balay c       rtol is a double precision variable.
161*a7e14dcfSSatish Balay c         On entry rtol is the relative accuracy desired in the
162*a7e14dcfSSatish Balay c            solution. Convergence occurs if
163*a7e14dcfSSatish Balay c
164*a7e14dcfSSatish Balay c              f(x) <= ((1 - rtol)**2)*f(xsol)
165*a7e14dcfSSatish Balay c
166*a7e14dcfSSatish Balay c         On exit rtol is unchanged.
167*a7e14dcfSSatish Balay c
168*a7e14dcfSSatish Balay c       atol is a double precision variable.
169*a7e14dcfSSatish Balay c         On entry atol is the absolute accuracy desired in the
170*a7e14dcfSSatish Balay c            solution. Convergence occurs when
171*a7e14dcfSSatish Balay c
172*a7e14dcfSSatish Balay c              norm(x) <= (1 + rtol)*delta
173*a7e14dcfSSatish Balay c
174*a7e14dcfSSatish Balay c              max(-f(x),-f(xsol)) <= atol
175*a7e14dcfSSatish Balay c
176*a7e14dcfSSatish Balay c         On exit atol is unchanged.
177*a7e14dcfSSatish Balay c
178*a7e14dcfSSatish Balay c       itmax is an integer variable.
179*a7e14dcfSSatish Balay c         On entry itmax specifies the maximum number of iterations.
180*a7e14dcfSSatish Balay c         On exit itmax is unchanged.
181*a7e14dcfSSatish Balay c
182*a7e14dcfSSatish Balay c       par is a double precision variable.
183*a7e14dcfSSatish Balay c         On entry par is an initial estimate of the Lagrange
184*a7e14dcfSSatish Balay c            multiplier for the constraint norm(x) <= delta.
185*a7e14dcfSSatish Balay c         On exit par contains the final estimate of the multiplier.
186*a7e14dcfSSatish Balay c
187*a7e14dcfSSatish Balay c       f is a double precision variable.
188*a7e14dcfSSatish Balay c         On entry f need not be specified.
189*a7e14dcfSSatish Balay c         On exit f is set to f(x) at the output x.
190*a7e14dcfSSatish Balay c
191*a7e14dcfSSatish Balay c       x is a double precision array of dimension n.
192*a7e14dcfSSatish Balay c         On entry x need not be specified.
193*a7e14dcfSSatish Balay c         On exit x is set to the final estimate of the solution.
194*a7e14dcfSSatish Balay c
195*a7e14dcfSSatish Balay c       info is an integer variable.
196*a7e14dcfSSatish Balay c         On entry info need not be specified.
197*a7e14dcfSSatish Balay c         On exit info is set as follows:
198*a7e14dcfSSatish Balay c
199*a7e14dcfSSatish Balay c            info = 1  The function value f(x) has the relative
200*a7e14dcfSSatish Balay c                      accuracy specified by rtol.
201*a7e14dcfSSatish Balay c
202*a7e14dcfSSatish Balay c            info = 2  The function value f(x) has the absolute
203*a7e14dcfSSatish Balay c                      accuracy specified by atol.
204*a7e14dcfSSatish Balay c
205*a7e14dcfSSatish Balay c            info = 3  Rounding errors prevent further progress.
206*a7e14dcfSSatish Balay c                      On exit x is the best available approximation.
207*a7e14dcfSSatish Balay c
208*a7e14dcfSSatish Balay c            info = 4  Failure to converge after itmax iterations.
209*a7e14dcfSSatish Balay c                      On exit x is the best available approximation.
210*a7e14dcfSSatish Balay c
211*a7e14dcfSSatish Balay c       z is a double precision work array of dimension n.
212*a7e14dcfSSatish Balay c
213*a7e14dcfSSatish Balay c       wa1 is a double precision work array of dimension n.
214*a7e14dcfSSatish Balay c
215*a7e14dcfSSatish Balay c       wa2 is a double precision work array of dimension n.
216*a7e14dcfSSatish Balay c
217*a7e14dcfSSatish Balay c     Subprograms called
218*a7e14dcfSSatish Balay c
219*a7e14dcfSSatish Balay c       MINPACK-2  ......  destsv
220*a7e14dcfSSatish Balay c
221*a7e14dcfSSatish Balay c       LAPACK  .........  dpotrf
222*a7e14dcfSSatish Balay c
223*a7e14dcfSSatish Balay c       Level 1 BLAS  ...  daxpy, dcopy, ddot, dnrm2, dscal
224*a7e14dcfSSatish Balay c
225*a7e14dcfSSatish Balay c       Level 2 BLAS  ...  dtrmv, dtrsv
226*a7e14dcfSSatish Balay c
227*a7e14dcfSSatish Balay c     MINPACK-2 Project. October 1993.
228*a7e14dcfSSatish Balay c     Argonne National Laboratory and University of Minnesota.
229*a7e14dcfSSatish Balay c     Brett M. Averick, Richard Carter, and Jorge J. More'
230*a7e14dcfSSatish Balay c
231*a7e14dcfSSatish Balay c     ***********
232*a7e14dcfSSatish Balay */
233*a7e14dcfSSatish Balay 
234*a7e14dcfSSatish Balay #undef __FUNCT__
235*a7e14dcfSSatish Balay #define __FUNCT__ "gqt"
236*a7e14dcfSSatish Balay PetscErrorCode gqt(PetscInt n, PetscReal *a, PetscInt lda, PetscReal *b,
237*a7e14dcfSSatish Balay 		   PetscReal delta, PetscReal rtol, PetscReal atol,
238*a7e14dcfSSatish Balay 		   PetscInt itmax, PetscReal *retpar, PetscReal *retf,
239*a7e14dcfSSatish Balay 		   PetscReal *x, PetscInt *retinfo, PetscInt *retits,
240*a7e14dcfSSatish Balay 		   PetscReal *z, PetscReal *wa1, PetscReal *wa2)
241*a7e14dcfSSatish Balay {
242*a7e14dcfSSatish Balay     PetscErrorCode ierr;
243*a7e14dcfSSatish Balay     PetscReal f=0.0,p001=0.001,p5=0.5,minusone=-1,delta2=delta*delta;
244*a7e14dcfSSatish Balay     PetscInt iter, j, rednc,info;
245*a7e14dcfSSatish Balay     PetscBLASInt indef;
246*a7e14dcfSSatish Balay     PetscBLASInt blas1=1, blasn=n, iblas, blaslda = lda,blasldap1=lda+1,blasinfo;
247*a7e14dcfSSatish Balay     PetscReal alpha, anorm, bnorm, parc, parf, parl, pars, par=*retpar,
248*a7e14dcfSSatish Balay 	paru, prod, rxnorm, rznorm=0.0, temp, xnorm;
249*a7e14dcfSSatish Balay 
250*a7e14dcfSSatish Balay     PetscFunctionBegin;
251*a7e14dcfSSatish Balay     iter = 0;
252*a7e14dcfSSatish Balay     parf = 0.0;
253*a7e14dcfSSatish Balay     xnorm = 0.0;
254*a7e14dcfSSatish Balay     rxnorm = 0.0;
255*a7e14dcfSSatish Balay     rednc = 0;
256*a7e14dcfSSatish Balay     for (j=0; j<n; j++) {
257*a7e14dcfSSatish Balay 	x[j] = 0.0;
258*a7e14dcfSSatish Balay 	z[j] = 0.0;
259*a7e14dcfSSatish Balay     }
260*a7e14dcfSSatish Balay 
261*a7e14dcfSSatish Balay     /* Copy the diagonal and save A in its lower triangle */
262*a7e14dcfSSatish Balay     BLAScopy_(&blasn,a,&blasldap1, wa1, &blas1);
263*a7e14dcfSSatish Balay     CHKMEMQ;
264*a7e14dcfSSatish Balay     for (j=0;j<n-1;j++) {
265*a7e14dcfSSatish Balay 	iblas = n - j - 1;
266*a7e14dcfSSatish Balay 	BLAScopy_(&iblas,&a[j + lda*(j+1)], &blaslda, &a[j+1 + lda*j], &blas1);
267*a7e14dcfSSatish Balay 	CHKMEMQ;
268*a7e14dcfSSatish Balay     }
269*a7e14dcfSSatish Balay 
270*a7e14dcfSSatish Balay     /* Calculate the l1-norm of A, the Gershgorin row sums, and the
271*a7e14dcfSSatish Balay        l2-norm of b */
272*a7e14dcfSSatish Balay     anorm = 0.0;
273*a7e14dcfSSatish Balay     for (j=0;j<n;j++) {
274*a7e14dcfSSatish Balay 	wa2[j] = BLASasum_(&blasn, &a[0 + lda*j], &blas1);
275*a7e14dcfSSatish Balay 	CHKMEMQ;
276*a7e14dcfSSatish Balay 	anorm = PetscMax(anorm,wa2[j]);
277*a7e14dcfSSatish Balay     }
278*a7e14dcfSSatish Balay     for (j=0;j<n;j++) {
279*a7e14dcfSSatish Balay 	wa2[j] = wa2[j] - PetscAbs(wa1[j]);
280*a7e14dcfSSatish Balay     }
281*a7e14dcfSSatish Balay     bnorm = BLASnrm2_(&blasn,b,&blas1);
282*a7e14dcfSSatish Balay     CHKMEMQ;
283*a7e14dcfSSatish Balay     /* Calculate a lower bound, pars, for the domain of the problem.
284*a7e14dcfSSatish Balay        Also calculate an upper bound, paru, and a lower bound, parl,
285*a7e14dcfSSatish Balay        for the Lagrange multiplier. */
286*a7e14dcfSSatish Balay     pars = parl = paru = -anorm;
287*a7e14dcfSSatish Balay     for (j=0;j<n;j++) {
288*a7e14dcfSSatish Balay 	pars = PetscMax(pars, -wa1[j]);
289*a7e14dcfSSatish Balay 	parl = PetscMax(parl, wa1[j] + wa2[j]);
290*a7e14dcfSSatish Balay 	paru = PetscMax(paru, -wa1[j] + wa2[j]);
291*a7e14dcfSSatish Balay     }
292*a7e14dcfSSatish Balay     parl = PetscMax(bnorm/delta - parl,pars);
293*a7e14dcfSSatish Balay     parl = PetscMax(0.0,parl);
294*a7e14dcfSSatish Balay     paru = PetscMax(0.0, bnorm/delta + paru);
295*a7e14dcfSSatish Balay 
296*a7e14dcfSSatish Balay     /* If the input par lies outside of the interval (parl, paru),
297*a7e14dcfSSatish Balay        set par to the closer endpoint. */
298*a7e14dcfSSatish Balay 
299*a7e14dcfSSatish Balay     par = PetscMax(par,parl);
300*a7e14dcfSSatish Balay     par = PetscMin(par,paru);
301*a7e14dcfSSatish Balay 
302*a7e14dcfSSatish Balay     /* Special case: parl == paru */
303*a7e14dcfSSatish Balay     paru = PetscMax(paru, (1.0 + rtol)*parl);
304*a7e14dcfSSatish Balay 
305*a7e14dcfSSatish Balay     /* Beginning of an iteration */
306*a7e14dcfSSatish Balay 
307*a7e14dcfSSatish Balay     info = 0;
308*a7e14dcfSSatish Balay     for (iter=1;iter<=itmax;iter++) {
309*a7e14dcfSSatish Balay 
310*a7e14dcfSSatish Balay       /* Safeguard par */
311*a7e14dcfSSatish Balay 	if (par <= pars && paru > 0) {
312*a7e14dcfSSatish Balay 	    par = PetscMax(p001, PetscSqrtScalar(parl/paru)) * paru;
313*a7e14dcfSSatish Balay 	}
314*a7e14dcfSSatish Balay 
315*a7e14dcfSSatish Balay 	/* Copy the lower triangle of A into its upper triangle and
316*a7e14dcfSSatish Balay 	   compute A + par*I */
317*a7e14dcfSSatish Balay 
318*a7e14dcfSSatish Balay 	for (j=0;j<n-1;j++) {
319*a7e14dcfSSatish Balay 	    iblas = n - j - 1;
320*a7e14dcfSSatish Balay 	    BLAScopy_(&iblas,&a[j+1 + j*lda], &blas1,
321*a7e14dcfSSatish Balay 		      &a[j + (j+1)*lda], &blaslda);
322*a7e14dcfSSatish Balay 	    CHKMEMQ;
323*a7e14dcfSSatish Balay 	}
324*a7e14dcfSSatish Balay 	for (j=0;j<n;j++) {
325*a7e14dcfSSatish Balay 	    a[j + j*lda] = wa1[j] + par;
326*a7e14dcfSSatish Balay 	}
327*a7e14dcfSSatish Balay 
328*a7e14dcfSSatish Balay 	/* Attempt the Cholesky factorization of A without referencing
329*a7e14dcfSSatish Balay 	   the lower triangular part. */
330*a7e14dcfSSatish Balay 	LAPACKpotrf_("U",&blasn,a,&blaslda,&indef);
331*a7e14dcfSSatish Balay 	CHKMEMQ;
332*a7e14dcfSSatish Balay 
333*a7e14dcfSSatish Balay 	/* Case 1: A + par*I is pos. def. */
334*a7e14dcfSSatish Balay 	if (indef == 0) {
335*a7e14dcfSSatish Balay 
336*a7e14dcfSSatish Balay   	  /* Compute an approximate solution x and save the
337*a7e14dcfSSatish Balay 	     last value of par with A + par*I pos. def. */
338*a7e14dcfSSatish Balay 
339*a7e14dcfSSatish Balay 	    parf = par;
340*a7e14dcfSSatish Balay 	    BLAScopy_(&blasn, b, &blas1, wa2, &blas1);
341*a7e14dcfSSatish Balay 	    CHKMEMQ;
342*a7e14dcfSSatish Balay 	    LAPACKtrtrs_("U","T","N",&blasn,&blas1,a,&blaslda,wa2,&blasn,&blasinfo);
343*a7e14dcfSSatish Balay 	    CHKMEMQ;
344*a7e14dcfSSatish Balay 	    rxnorm = BLASnrm2_(&blasn, wa2, &blas1);
345*a7e14dcfSSatish Balay 	    LAPACKtrtrs_("U","N","N",&blasn,&blas1,a,&blaslda,wa2,&blasn,&blasinfo);
346*a7e14dcfSSatish Balay 	    CHKMEMQ;
347*a7e14dcfSSatish Balay 	    BLAScopy_(&blasn, wa2, &blas1, x, &blas1);
348*a7e14dcfSSatish Balay 	    CHKMEMQ;
349*a7e14dcfSSatish Balay 	    BLASscal_(&blasn, &minusone, x, &blas1);
350*a7e14dcfSSatish Balay 	    CHKMEMQ;
351*a7e14dcfSSatish Balay 	    xnorm = BLASnrm2_(&blasn, x, &blas1);
352*a7e14dcfSSatish Balay 	    CHKMEMQ;
353*a7e14dcfSSatish Balay 
354*a7e14dcfSSatish Balay 	    /* Test for convergence */
355*a7e14dcfSSatish Balay 	    if (PetscAbs(xnorm - delta) <= rtol*delta ||
356*a7e14dcfSSatish Balay 		(par == 0  && xnorm <= (1.0+rtol)*delta)) {
357*a7e14dcfSSatish Balay 		info = 1;
358*a7e14dcfSSatish Balay 	    }
359*a7e14dcfSSatish Balay 
360*a7e14dcfSSatish Balay 	    /* Compute a direction of negative curvature and use this
361*a7e14dcfSSatish Balay 	       information to improve pars. */
362*a7e14dcfSSatish Balay 
363*a7e14dcfSSatish Balay 	    iblas=blasn*blasn;
364*a7e14dcfSSatish Balay 
365*a7e14dcfSSatish Balay 	    ierr = estsv(n,a,lda,&rznorm,z); CHKERRQ(ierr);
366*a7e14dcfSSatish Balay 	    CHKMEMQ;
367*a7e14dcfSSatish Balay 	    pars = PetscMax(pars, par-rznorm*rznorm);
368*a7e14dcfSSatish Balay 
369*a7e14dcfSSatish Balay 	    /* Compute a negative curvature solution of the form
370*a7e14dcfSSatish Balay 	       x + alpha*z,  where norm(x+alpha*z)==delta */
371*a7e14dcfSSatish Balay 
372*a7e14dcfSSatish Balay 	    rednc = 0;
373*a7e14dcfSSatish Balay 	    if (xnorm < delta) {
374*a7e14dcfSSatish Balay 
375*a7e14dcfSSatish Balay 	        /* Compute alpha */
376*a7e14dcfSSatish Balay 		prod = BLASdot_(&blasn, z, &blas1, x, &blas1) / delta;
377*a7e14dcfSSatish Balay 		temp = (delta - xnorm)*((delta + xnorm)/delta);
378*a7e14dcfSSatish Balay 		alpha = temp/(PetscAbs(prod) + PetscSqrtScalar(prod*prod + temp/delta));
379*a7e14dcfSSatish Balay 		if (prod >= 0)
380*a7e14dcfSSatish Balay 		    alpha = PetscAbs(alpha);
381*a7e14dcfSSatish Balay 		else
382*a7e14dcfSSatish Balay 		    alpha =-PetscAbs(alpha);
383*a7e14dcfSSatish Balay 
384*a7e14dcfSSatish Balay 
385*a7e14dcfSSatish Balay 		/* Test to decide if the negative curvature step
386*a7e14dcfSSatish Balay 		   produces a larger reduction than with z=0 */
387*a7e14dcfSSatish Balay 
388*a7e14dcfSSatish Balay 		rznorm = PetscAbs(alpha) * rznorm;
389*a7e14dcfSSatish Balay 		if ((rznorm*rznorm + par*xnorm*xnorm)/(delta2) <= par) {
390*a7e14dcfSSatish Balay 		    rednc = 1;
391*a7e14dcfSSatish Balay 		}
392*a7e14dcfSSatish Balay 
393*a7e14dcfSSatish Balay 		/* Test for convergence */
394*a7e14dcfSSatish Balay 		if (p5 * rznorm*rznorm / delta2 <=
395*a7e14dcfSSatish Balay 		    rtol*(1.0-p5*rtol)*(par + rxnorm*rxnorm/delta2)) {
396*a7e14dcfSSatish Balay 		    info = 1;
397*a7e14dcfSSatish Balay 		} else if (info == 0 &&
398*a7e14dcfSSatish Balay 			   (p5*(par + rxnorm*rxnorm/delta2) <= atol/delta2)) {
399*a7e14dcfSSatish Balay 		    info = 2;
400*a7e14dcfSSatish Balay 		}
401*a7e14dcfSSatish Balay 	    }
402*a7e14dcfSSatish Balay 
403*a7e14dcfSSatish Balay 	    /* Compute the Newton correction parc to par. */
404*a7e14dcfSSatish Balay 	    if (xnorm == 0) {
405*a7e14dcfSSatish Balay 		parc = -par;
406*a7e14dcfSSatish Balay 	    } else {
407*a7e14dcfSSatish Balay 		BLAScopy_(&blasn, x, &blas1, wa2, &blas1);
408*a7e14dcfSSatish Balay 		CHKMEMQ;
409*a7e14dcfSSatish Balay 		temp = 1.0/xnorm;
410*a7e14dcfSSatish Balay 		BLASscal_(&blasn, &temp, wa2, &blas1);
411*a7e14dcfSSatish Balay 		CHKMEMQ;
412*a7e14dcfSSatish Balay 		LAPACKtrtrs_("U","T","N",&blasn, &blas1, a, &blaslda, wa2, &blasn, &blasinfo);
413*a7e14dcfSSatish Balay 		CHKMEMQ;
414*a7e14dcfSSatish Balay 		temp = BLASnrm2_(&blasn, wa2, &blas1);
415*a7e14dcfSSatish Balay 		parc = (xnorm - delta)/(delta*temp*temp);
416*a7e14dcfSSatish Balay 	    }
417*a7e14dcfSSatish Balay 
418*a7e14dcfSSatish Balay 	    /* update parl or paru */
419*a7e14dcfSSatish Balay 	    if (xnorm > delta) {
420*a7e14dcfSSatish Balay 		parl = PetscMax(parl, par);
421*a7e14dcfSSatish Balay 	    } else if (xnorm < delta) {
422*a7e14dcfSSatish Balay 		paru = PetscMin(paru, par);
423*a7e14dcfSSatish Balay 	    }
424*a7e14dcfSSatish Balay 	} else {
425*a7e14dcfSSatish Balay 	    /* Case 2: A + par*I is not pos. def. */
426*a7e14dcfSSatish Balay 
427*a7e14dcfSSatish Balay 	    /* Use the rank information from the Cholesky
428*a7e14dcfSSatish Balay 	       decomposition to update par. */
429*a7e14dcfSSatish Balay 
430*a7e14dcfSSatish Balay 	    if (indef > 1) {
431*a7e14dcfSSatish Balay 
432*a7e14dcfSSatish Balay 	        /* Restore column indef to A + par*I. */
433*a7e14dcfSSatish Balay 		iblas = indef - 1;
434*a7e14dcfSSatish Balay 		BLAScopy_(&iblas,&a[indef-1 + 0*lda],&blaslda,
435*a7e14dcfSSatish Balay 			  &a[0 + (indef-1)*lda],&blas1);
436*a7e14dcfSSatish Balay 		CHKMEMQ;
437*a7e14dcfSSatish Balay 		a[indef-1 + (indef-1)*lda] = wa1[indef-1] + par;
438*a7e14dcfSSatish Balay 
439*a7e14dcfSSatish Balay 		/* compute parc. */
440*a7e14dcfSSatish Balay 
441*a7e14dcfSSatish Balay 		BLAScopy_(&iblas,&a[0 + (indef-1)*lda], &blas1, wa2, &blas1);
442*a7e14dcfSSatish Balay 		CHKMEMQ;
443*a7e14dcfSSatish Balay 		LAPACKtrtrs_("U","T","N",&iblas,&blas1,a,&blaslda,wa2,&blasn,&blasinfo);
444*a7e14dcfSSatish Balay 		CHKMEMQ;
445*a7e14dcfSSatish Balay 		BLAScopy_(&iblas,wa2,&blas1,&a[0 + (indef-1)*lda],&blas1);
446*a7e14dcfSSatish Balay 		CHKMEMQ;
447*a7e14dcfSSatish Balay 		temp = BLASnrm2_(&iblas,&a[0 + (indef-1)*lda],&blas1);
448*a7e14dcfSSatish Balay 		CHKMEMQ;
449*a7e14dcfSSatish Balay 		a[indef-1 + (indef-1)*lda] -= temp*temp;
450*a7e14dcfSSatish Balay 		LAPACKtrtrs_("U","N","N",&iblas,&blas1,a,&blaslda,wa2,&blasn,&blasinfo);
451*a7e14dcfSSatish Balay 		CHKMEMQ;
452*a7e14dcfSSatish Balay 	    }
453*a7e14dcfSSatish Balay 
454*a7e14dcfSSatish Balay 	    wa2[indef-1] = -1.0;
455*a7e14dcfSSatish Balay 	    iblas = indef;
456*a7e14dcfSSatish Balay 	    temp = BLASnrm2_(&iblas,wa2,&blas1);
457*a7e14dcfSSatish Balay 	    parc = - a[indef-1 + (indef-1)*lda]/(temp*temp);
458*a7e14dcfSSatish Balay 	    pars = PetscMax(pars,par+parc);
459*a7e14dcfSSatish Balay 
460*a7e14dcfSSatish Balay 	    /* If necessary, increase paru slightly.
461*a7e14dcfSSatish Balay 	       This is needed because in some exceptional situations
462*a7e14dcfSSatish Balay 	       paru is the optimal value of par. */
463*a7e14dcfSSatish Balay 
464*a7e14dcfSSatish Balay 	    paru = PetscMax(paru, (1.0+rtol)*pars);
465*a7e14dcfSSatish Balay 	}
466*a7e14dcfSSatish Balay 
467*a7e14dcfSSatish Balay 	/* Use pars to update parl */
468*a7e14dcfSSatish Balay 	parl = PetscMax(parl,pars);
469*a7e14dcfSSatish Balay 
470*a7e14dcfSSatish Balay 	/* Test for termination. */
471*a7e14dcfSSatish Balay 	if (info == 0) {
472*a7e14dcfSSatish Balay 	    if (iter == itmax) info=4;
473*a7e14dcfSSatish Balay 	    if (paru <= (1.0+p5*rtol)*pars) info=3;
474*a7e14dcfSSatish Balay 	    if (paru == 0.0) info = 2;
475*a7e14dcfSSatish Balay 	}
476*a7e14dcfSSatish Balay 
477*a7e14dcfSSatish Balay 	/* If exiting, store the best approximation and restore
478*a7e14dcfSSatish Balay 	   the upper triangle of A. */
479*a7e14dcfSSatish Balay 
480*a7e14dcfSSatish Balay 	if (info != 0) {
481*a7e14dcfSSatish Balay 
482*a7e14dcfSSatish Balay 	  /* Compute the best current estimates for x and f. */
483*a7e14dcfSSatish Balay 
484*a7e14dcfSSatish Balay 	    par = parf;
485*a7e14dcfSSatish Balay 	    f = -p5 * (rxnorm*rxnorm + par*xnorm*xnorm);
486*a7e14dcfSSatish Balay 	    if (rednc) {
487*a7e14dcfSSatish Balay 		f = -p5 * (rxnorm*rxnorm + par*delta*delta - rznorm*rznorm);
488*a7e14dcfSSatish Balay 		BLASaxpy_(&blasn, &alpha, z, &blas1, x, &blas1);
489*a7e14dcfSSatish Balay 		CHKMEMQ;
490*a7e14dcfSSatish Balay 	    }
491*a7e14dcfSSatish Balay 
492*a7e14dcfSSatish Balay 	    /* Restore the upper triangle of A */
493*a7e14dcfSSatish Balay 
494*a7e14dcfSSatish Balay 	    for (j = 0; j<n; j++) {
495*a7e14dcfSSatish Balay 		iblas = n - j - 1;
496*a7e14dcfSSatish Balay 		BLAScopy_(&iblas,&a[j+1 + j*lda],&blas1, &a[j + (j+1)*lda],&blaslda);
497*a7e14dcfSSatish Balay 		CHKMEMQ;
498*a7e14dcfSSatish Balay 	    }
499*a7e14dcfSSatish Balay 	    iblas = lda+1;
500*a7e14dcfSSatish Balay 	    BLAScopy_(&blasn,wa1,&blas1,a,&iblas);
501*a7e14dcfSSatish Balay 	    CHKMEMQ;
502*a7e14dcfSSatish Balay 	    break;
503*a7e14dcfSSatish Balay 	}
504*a7e14dcfSSatish Balay 	par = PetscMax(parl,par+parc);
505*a7e14dcfSSatish Balay     }
506*a7e14dcfSSatish Balay     *retpar = par;
507*a7e14dcfSSatish Balay     *retf = f;
508*a7e14dcfSSatish Balay     *retinfo = info;
509*a7e14dcfSSatish Balay     *retits = iter;
510*a7e14dcfSSatish Balay     CHKMEMQ;
511*a7e14dcfSSatish Balay     PetscFunctionReturn(0);
512*a7e14dcfSSatish Balay }
513