1*a7e14dcfSSatish Balay #include "petsc.h" 2*a7e14dcfSSatish Balay #include "petscblaslapack.h" 3*a7e14dcfSSatish Balay #include "taolapack.h" 4*a7e14dcfSSatish Balay 5*a7e14dcfSSatish Balay 6*a7e14dcfSSatish Balay 7*a7e14dcfSSatish Balay #undef __FUNCT__ 8*a7e14dcfSSatish Balay #define __FUNCT__ "estsv" 9*a7e14dcfSSatish Balay static PetscErrorCode estsv(PetscInt n, PetscReal *r, PetscInt ldr, PetscReal *svmin, PetscReal *z) { 10*a7e14dcfSSatish Balay 11*a7e14dcfSSatish Balay PetscBLASInt blas1=1, blasn=n, blasnmi, blasj, blasldr = ldr; 12*a7e14dcfSSatish Balay PetscInt i,j; 13*a7e14dcfSSatish Balay PetscReal e,temp,w,wm,ynorm,znorm,s,sm; 14*a7e14dcfSSatish Balay PetscFunctionBegin; 15*a7e14dcfSSatish Balay for (i=0;i<n;i++) { 16*a7e14dcfSSatish Balay z[i]=0.0; 17*a7e14dcfSSatish Balay } 18*a7e14dcfSSatish Balay 19*a7e14dcfSSatish Balay e = PetscAbs(r[0]); 20*a7e14dcfSSatish Balay if (e == 0.0) { 21*a7e14dcfSSatish Balay *svmin = 0.0; 22*a7e14dcfSSatish Balay z[0] = 1.0; 23*a7e14dcfSSatish Balay } else { 24*a7e14dcfSSatish Balay /* Solve R'*y = e */ 25*a7e14dcfSSatish Balay for (i=0;i<n;i++) { 26*a7e14dcfSSatish Balay 27*a7e14dcfSSatish Balay /* Scale y. The scaling factor (0.01) reduces the number of scalings */ 28*a7e14dcfSSatish Balay if (z[i] >= 0.0) 29*a7e14dcfSSatish Balay e=-PetscAbs(e); 30*a7e14dcfSSatish Balay else 31*a7e14dcfSSatish Balay e = PetscAbs(e); 32*a7e14dcfSSatish Balay 33*a7e14dcfSSatish Balay if (PetscAbs(e - z[i]) > PetscAbs(r[i + ldr*i])) { 34*a7e14dcfSSatish Balay temp = PetscMin(0.01,PetscAbs(r[i + ldr*i]))/ 35*a7e14dcfSSatish Balay PetscAbs(e-z[i]); 36*a7e14dcfSSatish Balay BLASscal_(&blasn, &temp, z, &blas1); 37*a7e14dcfSSatish Balay e = temp*e; 38*a7e14dcfSSatish Balay } 39*a7e14dcfSSatish Balay 40*a7e14dcfSSatish Balay /* Determine the two possible choices of y[i] */ 41*a7e14dcfSSatish Balay if (r[i + ldr*i] == 0.0) 42*a7e14dcfSSatish Balay w = wm = 1.0; 43*a7e14dcfSSatish Balay else { 44*a7e14dcfSSatish Balay w = (e - z[i]) / r[i + ldr*i]; 45*a7e14dcfSSatish Balay wm = - (e + z[i]) / r[i + ldr*i]; 46*a7e14dcfSSatish Balay } 47*a7e14dcfSSatish Balay 48*a7e14dcfSSatish Balay /* Chose y[i] based on the predicted value of y[j] for j>i */ 49*a7e14dcfSSatish Balay s = PetscAbs(e - z[i]); 50*a7e14dcfSSatish Balay sm = PetscAbs(e + z[i]); 51*a7e14dcfSSatish Balay for (j=i+1;j<n;j++) { 52*a7e14dcfSSatish Balay sm += PetscAbs(z[j] + wm * r[i + ldr*j]); 53*a7e14dcfSSatish Balay } 54*a7e14dcfSSatish Balay if (i < n-1) { 55*a7e14dcfSSatish Balay blasnmi = n-i-1; 56*a7e14dcfSSatish Balay BLASaxpy_(&blasnmi, &w, &r[i + ldr*(i+1)], &blasldr, &z[i+1], &blas1); 57*a7e14dcfSSatish Balay s += BLASasum_(&blasnmi, &z[i+1], &blas1); 58*a7e14dcfSSatish Balay } 59*a7e14dcfSSatish Balay if (s < sm) { 60*a7e14dcfSSatish Balay temp = wm - w; 61*a7e14dcfSSatish Balay w = wm; 62*a7e14dcfSSatish Balay if (i < n-1) { 63*a7e14dcfSSatish Balay BLASaxpy_(&blasnmi, &temp, &r[i + ldr*(i+1)], &blasldr, &z[i+1], &blas1); 64*a7e14dcfSSatish Balay } 65*a7e14dcfSSatish Balay } 66*a7e14dcfSSatish Balay z[i] = w; 67*a7e14dcfSSatish Balay } 68*a7e14dcfSSatish Balay 69*a7e14dcfSSatish Balay ynorm = BLASnrm2_(&blasn, z, &blas1); 70*a7e14dcfSSatish Balay 71*a7e14dcfSSatish Balay /* Solve R*z = y */ 72*a7e14dcfSSatish Balay for (j=n-1; j>=0; j--) { 73*a7e14dcfSSatish Balay 74*a7e14dcfSSatish Balay /* Scale z */ 75*a7e14dcfSSatish Balay 76*a7e14dcfSSatish Balay if (PetscAbs(z[j]) > PetscAbs(r[j + ldr*j])) { 77*a7e14dcfSSatish Balay temp = PetscMin(0.01, PetscAbs(r[j + ldr*j] / z[j])); 78*a7e14dcfSSatish Balay BLASscal_(&blasn, &temp, z, &blas1); 79*a7e14dcfSSatish Balay ynorm *=temp; 80*a7e14dcfSSatish Balay } 81*a7e14dcfSSatish Balay if (r[j + ldr*j] == 0) { 82*a7e14dcfSSatish Balay z[j] = 1.0; 83*a7e14dcfSSatish Balay } else { 84*a7e14dcfSSatish Balay z[j] = z[j] / r[j + ldr*j]; 85*a7e14dcfSSatish Balay } 86*a7e14dcfSSatish Balay temp = -z[j]; 87*a7e14dcfSSatish Balay blasj=j; 88*a7e14dcfSSatish Balay BLASaxpy_(&blasj,&temp,&r[0+ldr*j],&blas1,z,&blas1); 89*a7e14dcfSSatish Balay } 90*a7e14dcfSSatish Balay 91*a7e14dcfSSatish Balay /* Compute svmin and normalize z */ 92*a7e14dcfSSatish Balay znorm = 1.0 / BLASnrm2_(&blasn, z, &blas1); 93*a7e14dcfSSatish Balay *svmin = ynorm*znorm; 94*a7e14dcfSSatish Balay BLASscal_(&blasn, &znorm, z, &blas1); 95*a7e14dcfSSatish Balay 96*a7e14dcfSSatish Balay } 97*a7e14dcfSSatish Balay 98*a7e14dcfSSatish Balay PetscFunctionReturn(0); 99*a7e14dcfSSatish Balay } 100*a7e14dcfSSatish Balay 101*a7e14dcfSSatish Balay 102*a7e14dcfSSatish Balay 103*a7e14dcfSSatish Balay /* 104*a7e14dcfSSatish Balay c *********** 105*a7e14dcfSSatish Balay c 106*a7e14dcfSSatish Balay c Subroutine dgqt 107*a7e14dcfSSatish Balay c 108*a7e14dcfSSatish Balay c Given an n by n symmetric matrix A, an n-vector b, and a 109*a7e14dcfSSatish Balay c positive number delta, this subroutine determines a vector 110*a7e14dcfSSatish Balay c x which approximately minimizes the quadratic function 111*a7e14dcfSSatish Balay c 112*a7e14dcfSSatish Balay c f(x) = (1/2)*x'*A*x + b'*x 113*a7e14dcfSSatish Balay c 114*a7e14dcfSSatish Balay c subject to the Euclidean norm constraint 115*a7e14dcfSSatish Balay c 116*a7e14dcfSSatish Balay c norm(x) <= delta. 117*a7e14dcfSSatish Balay c 118*a7e14dcfSSatish Balay c This subroutine computes an approximation x and a Lagrange 119*a7e14dcfSSatish Balay c multiplier par such that either par is zero and 120*a7e14dcfSSatish Balay c 121*a7e14dcfSSatish Balay c norm(x) <= (1+rtol)*delta, 122*a7e14dcfSSatish Balay c 123*a7e14dcfSSatish Balay c or par is positive and 124*a7e14dcfSSatish Balay c 125*a7e14dcfSSatish Balay c abs(norm(x) - delta) <= rtol*delta. 126*a7e14dcfSSatish Balay c 127*a7e14dcfSSatish Balay c If xsol is the solution to the problem, the approximation x 128*a7e14dcfSSatish Balay c satisfies 129*a7e14dcfSSatish Balay c 130*a7e14dcfSSatish Balay c f(x) <= ((1 - rtol)**2)*f(xsol) 131*a7e14dcfSSatish Balay c 132*a7e14dcfSSatish Balay c The subroutine statement is 133*a7e14dcfSSatish Balay c 134*a7e14dcfSSatish Balay c subroutine dgqt(n,a,lda,b,delta,rtol,atol,itmax, 135*a7e14dcfSSatish Balay c par,f,x,info,z,wa1,wa2) 136*a7e14dcfSSatish Balay c 137*a7e14dcfSSatish Balay c where 138*a7e14dcfSSatish Balay c 139*a7e14dcfSSatish Balay c n is an integer variable. 140*a7e14dcfSSatish Balay c On entry n is the order of A. 141*a7e14dcfSSatish Balay c On exit n is unchanged. 142*a7e14dcfSSatish Balay c 143*a7e14dcfSSatish Balay c a is a double precision array of dimension (lda,n). 144*a7e14dcfSSatish Balay c On entry the full upper triangle of a must contain the 145*a7e14dcfSSatish Balay c full upper triangle of the symmetric matrix A. 146*a7e14dcfSSatish Balay c On exit the array contains the matrix A. 147*a7e14dcfSSatish Balay c 148*a7e14dcfSSatish Balay c lda is an integer variable. 149*a7e14dcfSSatish Balay c On entry lda is the leading dimension of the array a. 150*a7e14dcfSSatish Balay c On exit lda is unchanged. 151*a7e14dcfSSatish Balay c 152*a7e14dcfSSatish Balay c b is an double precision array of dimension n. 153*a7e14dcfSSatish Balay c On entry b specifies the linear term in the quadratic. 154*a7e14dcfSSatish Balay c On exit b is unchanged. 155*a7e14dcfSSatish Balay c 156*a7e14dcfSSatish Balay c delta is a double precision variable. 157*a7e14dcfSSatish Balay c On entry delta is a bound on the Euclidean norm of x. 158*a7e14dcfSSatish Balay c On exit delta is unchanged. 159*a7e14dcfSSatish Balay c 160*a7e14dcfSSatish Balay c rtol is a double precision variable. 161*a7e14dcfSSatish Balay c On entry rtol is the relative accuracy desired in the 162*a7e14dcfSSatish Balay c solution. Convergence occurs if 163*a7e14dcfSSatish Balay c 164*a7e14dcfSSatish Balay c f(x) <= ((1 - rtol)**2)*f(xsol) 165*a7e14dcfSSatish Balay c 166*a7e14dcfSSatish Balay c On exit rtol is unchanged. 167*a7e14dcfSSatish Balay c 168*a7e14dcfSSatish Balay c atol is a double precision variable. 169*a7e14dcfSSatish Balay c On entry atol is the absolute accuracy desired in the 170*a7e14dcfSSatish Balay c solution. Convergence occurs when 171*a7e14dcfSSatish Balay c 172*a7e14dcfSSatish Balay c norm(x) <= (1 + rtol)*delta 173*a7e14dcfSSatish Balay c 174*a7e14dcfSSatish Balay c max(-f(x),-f(xsol)) <= atol 175*a7e14dcfSSatish Balay c 176*a7e14dcfSSatish Balay c On exit atol is unchanged. 177*a7e14dcfSSatish Balay c 178*a7e14dcfSSatish Balay c itmax is an integer variable. 179*a7e14dcfSSatish Balay c On entry itmax specifies the maximum number of iterations. 180*a7e14dcfSSatish Balay c On exit itmax is unchanged. 181*a7e14dcfSSatish Balay c 182*a7e14dcfSSatish Balay c par is a double precision variable. 183*a7e14dcfSSatish Balay c On entry par is an initial estimate of the Lagrange 184*a7e14dcfSSatish Balay c multiplier for the constraint norm(x) <= delta. 185*a7e14dcfSSatish Balay c On exit par contains the final estimate of the multiplier. 186*a7e14dcfSSatish Balay c 187*a7e14dcfSSatish Balay c f is a double precision variable. 188*a7e14dcfSSatish Balay c On entry f need not be specified. 189*a7e14dcfSSatish Balay c On exit f is set to f(x) at the output x. 190*a7e14dcfSSatish Balay c 191*a7e14dcfSSatish Balay c x is a double precision array of dimension n. 192*a7e14dcfSSatish Balay c On entry x need not be specified. 193*a7e14dcfSSatish Balay c On exit x is set to the final estimate of the solution. 194*a7e14dcfSSatish Balay c 195*a7e14dcfSSatish Balay c info is an integer variable. 196*a7e14dcfSSatish Balay c On entry info need not be specified. 197*a7e14dcfSSatish Balay c On exit info is set as follows: 198*a7e14dcfSSatish Balay c 199*a7e14dcfSSatish Balay c info = 1 The function value f(x) has the relative 200*a7e14dcfSSatish Balay c accuracy specified by rtol. 201*a7e14dcfSSatish Balay c 202*a7e14dcfSSatish Balay c info = 2 The function value f(x) has the absolute 203*a7e14dcfSSatish Balay c accuracy specified by atol. 204*a7e14dcfSSatish Balay c 205*a7e14dcfSSatish Balay c info = 3 Rounding errors prevent further progress. 206*a7e14dcfSSatish Balay c On exit x is the best available approximation. 207*a7e14dcfSSatish Balay c 208*a7e14dcfSSatish Balay c info = 4 Failure to converge after itmax iterations. 209*a7e14dcfSSatish Balay c On exit x is the best available approximation. 210*a7e14dcfSSatish Balay c 211*a7e14dcfSSatish Balay c z is a double precision work array of dimension n. 212*a7e14dcfSSatish Balay c 213*a7e14dcfSSatish Balay c wa1 is a double precision work array of dimension n. 214*a7e14dcfSSatish Balay c 215*a7e14dcfSSatish Balay c wa2 is a double precision work array of dimension n. 216*a7e14dcfSSatish Balay c 217*a7e14dcfSSatish Balay c Subprograms called 218*a7e14dcfSSatish Balay c 219*a7e14dcfSSatish Balay c MINPACK-2 ...... destsv 220*a7e14dcfSSatish Balay c 221*a7e14dcfSSatish Balay c LAPACK ......... dpotrf 222*a7e14dcfSSatish Balay c 223*a7e14dcfSSatish Balay c Level 1 BLAS ... daxpy, dcopy, ddot, dnrm2, dscal 224*a7e14dcfSSatish Balay c 225*a7e14dcfSSatish Balay c Level 2 BLAS ... dtrmv, dtrsv 226*a7e14dcfSSatish Balay c 227*a7e14dcfSSatish Balay c MINPACK-2 Project. October 1993. 228*a7e14dcfSSatish Balay c Argonne National Laboratory and University of Minnesota. 229*a7e14dcfSSatish Balay c Brett M. Averick, Richard Carter, and Jorge J. More' 230*a7e14dcfSSatish Balay c 231*a7e14dcfSSatish Balay c *********** 232*a7e14dcfSSatish Balay */ 233*a7e14dcfSSatish Balay 234*a7e14dcfSSatish Balay #undef __FUNCT__ 235*a7e14dcfSSatish Balay #define __FUNCT__ "gqt" 236*a7e14dcfSSatish Balay PetscErrorCode gqt(PetscInt n, PetscReal *a, PetscInt lda, PetscReal *b, 237*a7e14dcfSSatish Balay PetscReal delta, PetscReal rtol, PetscReal atol, 238*a7e14dcfSSatish Balay PetscInt itmax, PetscReal *retpar, PetscReal *retf, 239*a7e14dcfSSatish Balay PetscReal *x, PetscInt *retinfo, PetscInt *retits, 240*a7e14dcfSSatish Balay PetscReal *z, PetscReal *wa1, PetscReal *wa2) 241*a7e14dcfSSatish Balay { 242*a7e14dcfSSatish Balay PetscErrorCode ierr; 243*a7e14dcfSSatish Balay PetscReal f=0.0,p001=0.001,p5=0.5,minusone=-1,delta2=delta*delta; 244*a7e14dcfSSatish Balay PetscInt iter, j, rednc,info; 245*a7e14dcfSSatish Balay PetscBLASInt indef; 246*a7e14dcfSSatish Balay PetscBLASInt blas1=1, blasn=n, iblas, blaslda = lda,blasldap1=lda+1,blasinfo; 247*a7e14dcfSSatish Balay PetscReal alpha, anorm, bnorm, parc, parf, parl, pars, par=*retpar, 248*a7e14dcfSSatish Balay paru, prod, rxnorm, rznorm=0.0, temp, xnorm; 249*a7e14dcfSSatish Balay 250*a7e14dcfSSatish Balay PetscFunctionBegin; 251*a7e14dcfSSatish Balay iter = 0; 252*a7e14dcfSSatish Balay parf = 0.0; 253*a7e14dcfSSatish Balay xnorm = 0.0; 254*a7e14dcfSSatish Balay rxnorm = 0.0; 255*a7e14dcfSSatish Balay rednc = 0; 256*a7e14dcfSSatish Balay for (j=0; j<n; j++) { 257*a7e14dcfSSatish Balay x[j] = 0.0; 258*a7e14dcfSSatish Balay z[j] = 0.0; 259*a7e14dcfSSatish Balay } 260*a7e14dcfSSatish Balay 261*a7e14dcfSSatish Balay /* Copy the diagonal and save A in its lower triangle */ 262*a7e14dcfSSatish Balay BLAScopy_(&blasn,a,&blasldap1, wa1, &blas1); 263*a7e14dcfSSatish Balay CHKMEMQ; 264*a7e14dcfSSatish Balay for (j=0;j<n-1;j++) { 265*a7e14dcfSSatish Balay iblas = n - j - 1; 266*a7e14dcfSSatish Balay BLAScopy_(&iblas,&a[j + lda*(j+1)], &blaslda, &a[j+1 + lda*j], &blas1); 267*a7e14dcfSSatish Balay CHKMEMQ; 268*a7e14dcfSSatish Balay } 269*a7e14dcfSSatish Balay 270*a7e14dcfSSatish Balay /* Calculate the l1-norm of A, the Gershgorin row sums, and the 271*a7e14dcfSSatish Balay l2-norm of b */ 272*a7e14dcfSSatish Balay anorm = 0.0; 273*a7e14dcfSSatish Balay for (j=0;j<n;j++) { 274*a7e14dcfSSatish Balay wa2[j] = BLASasum_(&blasn, &a[0 + lda*j], &blas1); 275*a7e14dcfSSatish Balay CHKMEMQ; 276*a7e14dcfSSatish Balay anorm = PetscMax(anorm,wa2[j]); 277*a7e14dcfSSatish Balay } 278*a7e14dcfSSatish Balay for (j=0;j<n;j++) { 279*a7e14dcfSSatish Balay wa2[j] = wa2[j] - PetscAbs(wa1[j]); 280*a7e14dcfSSatish Balay } 281*a7e14dcfSSatish Balay bnorm = BLASnrm2_(&blasn,b,&blas1); 282*a7e14dcfSSatish Balay CHKMEMQ; 283*a7e14dcfSSatish Balay /* Calculate a lower bound, pars, for the domain of the problem. 284*a7e14dcfSSatish Balay Also calculate an upper bound, paru, and a lower bound, parl, 285*a7e14dcfSSatish Balay for the Lagrange multiplier. */ 286*a7e14dcfSSatish Balay pars = parl = paru = -anorm; 287*a7e14dcfSSatish Balay for (j=0;j<n;j++) { 288*a7e14dcfSSatish Balay pars = PetscMax(pars, -wa1[j]); 289*a7e14dcfSSatish Balay parl = PetscMax(parl, wa1[j] + wa2[j]); 290*a7e14dcfSSatish Balay paru = PetscMax(paru, -wa1[j] + wa2[j]); 291*a7e14dcfSSatish Balay } 292*a7e14dcfSSatish Balay parl = PetscMax(bnorm/delta - parl,pars); 293*a7e14dcfSSatish Balay parl = PetscMax(0.0,parl); 294*a7e14dcfSSatish Balay paru = PetscMax(0.0, bnorm/delta + paru); 295*a7e14dcfSSatish Balay 296*a7e14dcfSSatish Balay /* If the input par lies outside of the interval (parl, paru), 297*a7e14dcfSSatish Balay set par to the closer endpoint. */ 298*a7e14dcfSSatish Balay 299*a7e14dcfSSatish Balay par = PetscMax(par,parl); 300*a7e14dcfSSatish Balay par = PetscMin(par,paru); 301*a7e14dcfSSatish Balay 302*a7e14dcfSSatish Balay /* Special case: parl == paru */ 303*a7e14dcfSSatish Balay paru = PetscMax(paru, (1.0 + rtol)*parl); 304*a7e14dcfSSatish Balay 305*a7e14dcfSSatish Balay /* Beginning of an iteration */ 306*a7e14dcfSSatish Balay 307*a7e14dcfSSatish Balay info = 0; 308*a7e14dcfSSatish Balay for (iter=1;iter<=itmax;iter++) { 309*a7e14dcfSSatish Balay 310*a7e14dcfSSatish Balay /* Safeguard par */ 311*a7e14dcfSSatish Balay if (par <= pars && paru > 0) { 312*a7e14dcfSSatish Balay par = PetscMax(p001, PetscSqrtScalar(parl/paru)) * paru; 313*a7e14dcfSSatish Balay } 314*a7e14dcfSSatish Balay 315*a7e14dcfSSatish Balay /* Copy the lower triangle of A into its upper triangle and 316*a7e14dcfSSatish Balay compute A + par*I */ 317*a7e14dcfSSatish Balay 318*a7e14dcfSSatish Balay for (j=0;j<n-1;j++) { 319*a7e14dcfSSatish Balay iblas = n - j - 1; 320*a7e14dcfSSatish Balay BLAScopy_(&iblas,&a[j+1 + j*lda], &blas1, 321*a7e14dcfSSatish Balay &a[j + (j+1)*lda], &blaslda); 322*a7e14dcfSSatish Balay CHKMEMQ; 323*a7e14dcfSSatish Balay } 324*a7e14dcfSSatish Balay for (j=0;j<n;j++) { 325*a7e14dcfSSatish Balay a[j + j*lda] = wa1[j] + par; 326*a7e14dcfSSatish Balay } 327*a7e14dcfSSatish Balay 328*a7e14dcfSSatish Balay /* Attempt the Cholesky factorization of A without referencing 329*a7e14dcfSSatish Balay the lower triangular part. */ 330*a7e14dcfSSatish Balay LAPACKpotrf_("U",&blasn,a,&blaslda,&indef); 331*a7e14dcfSSatish Balay CHKMEMQ; 332*a7e14dcfSSatish Balay 333*a7e14dcfSSatish Balay /* Case 1: A + par*I is pos. def. */ 334*a7e14dcfSSatish Balay if (indef == 0) { 335*a7e14dcfSSatish Balay 336*a7e14dcfSSatish Balay /* Compute an approximate solution x and save the 337*a7e14dcfSSatish Balay last value of par with A + par*I pos. def. */ 338*a7e14dcfSSatish Balay 339*a7e14dcfSSatish Balay parf = par; 340*a7e14dcfSSatish Balay BLAScopy_(&blasn, b, &blas1, wa2, &blas1); 341*a7e14dcfSSatish Balay CHKMEMQ; 342*a7e14dcfSSatish Balay LAPACKtrtrs_("U","T","N",&blasn,&blas1,a,&blaslda,wa2,&blasn,&blasinfo); 343*a7e14dcfSSatish Balay CHKMEMQ; 344*a7e14dcfSSatish Balay rxnorm = BLASnrm2_(&blasn, wa2, &blas1); 345*a7e14dcfSSatish Balay LAPACKtrtrs_("U","N","N",&blasn,&blas1,a,&blaslda,wa2,&blasn,&blasinfo); 346*a7e14dcfSSatish Balay CHKMEMQ; 347*a7e14dcfSSatish Balay BLAScopy_(&blasn, wa2, &blas1, x, &blas1); 348*a7e14dcfSSatish Balay CHKMEMQ; 349*a7e14dcfSSatish Balay BLASscal_(&blasn, &minusone, x, &blas1); 350*a7e14dcfSSatish Balay CHKMEMQ; 351*a7e14dcfSSatish Balay xnorm = BLASnrm2_(&blasn, x, &blas1); 352*a7e14dcfSSatish Balay CHKMEMQ; 353*a7e14dcfSSatish Balay 354*a7e14dcfSSatish Balay /* Test for convergence */ 355*a7e14dcfSSatish Balay if (PetscAbs(xnorm - delta) <= rtol*delta || 356*a7e14dcfSSatish Balay (par == 0 && xnorm <= (1.0+rtol)*delta)) { 357*a7e14dcfSSatish Balay info = 1; 358*a7e14dcfSSatish Balay } 359*a7e14dcfSSatish Balay 360*a7e14dcfSSatish Balay /* Compute a direction of negative curvature and use this 361*a7e14dcfSSatish Balay information to improve pars. */ 362*a7e14dcfSSatish Balay 363*a7e14dcfSSatish Balay iblas=blasn*blasn; 364*a7e14dcfSSatish Balay 365*a7e14dcfSSatish Balay ierr = estsv(n,a,lda,&rznorm,z); CHKERRQ(ierr); 366*a7e14dcfSSatish Balay CHKMEMQ; 367*a7e14dcfSSatish Balay pars = PetscMax(pars, par-rznorm*rznorm); 368*a7e14dcfSSatish Balay 369*a7e14dcfSSatish Balay /* Compute a negative curvature solution of the form 370*a7e14dcfSSatish Balay x + alpha*z, where norm(x+alpha*z)==delta */ 371*a7e14dcfSSatish Balay 372*a7e14dcfSSatish Balay rednc = 0; 373*a7e14dcfSSatish Balay if (xnorm < delta) { 374*a7e14dcfSSatish Balay 375*a7e14dcfSSatish Balay /* Compute alpha */ 376*a7e14dcfSSatish Balay prod = BLASdot_(&blasn, z, &blas1, x, &blas1) / delta; 377*a7e14dcfSSatish Balay temp = (delta - xnorm)*((delta + xnorm)/delta); 378*a7e14dcfSSatish Balay alpha = temp/(PetscAbs(prod) + PetscSqrtScalar(prod*prod + temp/delta)); 379*a7e14dcfSSatish Balay if (prod >= 0) 380*a7e14dcfSSatish Balay alpha = PetscAbs(alpha); 381*a7e14dcfSSatish Balay else 382*a7e14dcfSSatish Balay alpha =-PetscAbs(alpha); 383*a7e14dcfSSatish Balay 384*a7e14dcfSSatish Balay 385*a7e14dcfSSatish Balay /* Test to decide if the negative curvature step 386*a7e14dcfSSatish Balay produces a larger reduction than with z=0 */ 387*a7e14dcfSSatish Balay 388*a7e14dcfSSatish Balay rznorm = PetscAbs(alpha) * rznorm; 389*a7e14dcfSSatish Balay if ((rznorm*rznorm + par*xnorm*xnorm)/(delta2) <= par) { 390*a7e14dcfSSatish Balay rednc = 1; 391*a7e14dcfSSatish Balay } 392*a7e14dcfSSatish Balay 393*a7e14dcfSSatish Balay /* Test for convergence */ 394*a7e14dcfSSatish Balay if (p5 * rznorm*rznorm / delta2 <= 395*a7e14dcfSSatish Balay rtol*(1.0-p5*rtol)*(par + rxnorm*rxnorm/delta2)) { 396*a7e14dcfSSatish Balay info = 1; 397*a7e14dcfSSatish Balay } else if (info == 0 && 398*a7e14dcfSSatish Balay (p5*(par + rxnorm*rxnorm/delta2) <= atol/delta2)) { 399*a7e14dcfSSatish Balay info = 2; 400*a7e14dcfSSatish Balay } 401*a7e14dcfSSatish Balay } 402*a7e14dcfSSatish Balay 403*a7e14dcfSSatish Balay /* Compute the Newton correction parc to par. */ 404*a7e14dcfSSatish Balay if (xnorm == 0) { 405*a7e14dcfSSatish Balay parc = -par; 406*a7e14dcfSSatish Balay } else { 407*a7e14dcfSSatish Balay BLAScopy_(&blasn, x, &blas1, wa2, &blas1); 408*a7e14dcfSSatish Balay CHKMEMQ; 409*a7e14dcfSSatish Balay temp = 1.0/xnorm; 410*a7e14dcfSSatish Balay BLASscal_(&blasn, &temp, wa2, &blas1); 411*a7e14dcfSSatish Balay CHKMEMQ; 412*a7e14dcfSSatish Balay LAPACKtrtrs_("U","T","N",&blasn, &blas1, a, &blaslda, wa2, &blasn, &blasinfo); 413*a7e14dcfSSatish Balay CHKMEMQ; 414*a7e14dcfSSatish Balay temp = BLASnrm2_(&blasn, wa2, &blas1); 415*a7e14dcfSSatish Balay parc = (xnorm - delta)/(delta*temp*temp); 416*a7e14dcfSSatish Balay } 417*a7e14dcfSSatish Balay 418*a7e14dcfSSatish Balay /* update parl or paru */ 419*a7e14dcfSSatish Balay if (xnorm > delta) { 420*a7e14dcfSSatish Balay parl = PetscMax(parl, par); 421*a7e14dcfSSatish Balay } else if (xnorm < delta) { 422*a7e14dcfSSatish Balay paru = PetscMin(paru, par); 423*a7e14dcfSSatish Balay } 424*a7e14dcfSSatish Balay } else { 425*a7e14dcfSSatish Balay /* Case 2: A + par*I is not pos. def. */ 426*a7e14dcfSSatish Balay 427*a7e14dcfSSatish Balay /* Use the rank information from the Cholesky 428*a7e14dcfSSatish Balay decomposition to update par. */ 429*a7e14dcfSSatish Balay 430*a7e14dcfSSatish Balay if (indef > 1) { 431*a7e14dcfSSatish Balay 432*a7e14dcfSSatish Balay /* Restore column indef to A + par*I. */ 433*a7e14dcfSSatish Balay iblas = indef - 1; 434*a7e14dcfSSatish Balay BLAScopy_(&iblas,&a[indef-1 + 0*lda],&blaslda, 435*a7e14dcfSSatish Balay &a[0 + (indef-1)*lda],&blas1); 436*a7e14dcfSSatish Balay CHKMEMQ; 437*a7e14dcfSSatish Balay a[indef-1 + (indef-1)*lda] = wa1[indef-1] + par; 438*a7e14dcfSSatish Balay 439*a7e14dcfSSatish Balay /* compute parc. */ 440*a7e14dcfSSatish Balay 441*a7e14dcfSSatish Balay BLAScopy_(&iblas,&a[0 + (indef-1)*lda], &blas1, wa2, &blas1); 442*a7e14dcfSSatish Balay CHKMEMQ; 443*a7e14dcfSSatish Balay LAPACKtrtrs_("U","T","N",&iblas,&blas1,a,&blaslda,wa2,&blasn,&blasinfo); 444*a7e14dcfSSatish Balay CHKMEMQ; 445*a7e14dcfSSatish Balay BLAScopy_(&iblas,wa2,&blas1,&a[0 + (indef-1)*lda],&blas1); 446*a7e14dcfSSatish Balay CHKMEMQ; 447*a7e14dcfSSatish Balay temp = BLASnrm2_(&iblas,&a[0 + (indef-1)*lda],&blas1); 448*a7e14dcfSSatish Balay CHKMEMQ; 449*a7e14dcfSSatish Balay a[indef-1 + (indef-1)*lda] -= temp*temp; 450*a7e14dcfSSatish Balay LAPACKtrtrs_("U","N","N",&iblas,&blas1,a,&blaslda,wa2,&blasn,&blasinfo); 451*a7e14dcfSSatish Balay CHKMEMQ; 452*a7e14dcfSSatish Balay } 453*a7e14dcfSSatish Balay 454*a7e14dcfSSatish Balay wa2[indef-1] = -1.0; 455*a7e14dcfSSatish Balay iblas = indef; 456*a7e14dcfSSatish Balay temp = BLASnrm2_(&iblas,wa2,&blas1); 457*a7e14dcfSSatish Balay parc = - a[indef-1 + (indef-1)*lda]/(temp*temp); 458*a7e14dcfSSatish Balay pars = PetscMax(pars,par+parc); 459*a7e14dcfSSatish Balay 460*a7e14dcfSSatish Balay /* If necessary, increase paru slightly. 461*a7e14dcfSSatish Balay This is needed because in some exceptional situations 462*a7e14dcfSSatish Balay paru is the optimal value of par. */ 463*a7e14dcfSSatish Balay 464*a7e14dcfSSatish Balay paru = PetscMax(paru, (1.0+rtol)*pars); 465*a7e14dcfSSatish Balay } 466*a7e14dcfSSatish Balay 467*a7e14dcfSSatish Balay /* Use pars to update parl */ 468*a7e14dcfSSatish Balay parl = PetscMax(parl,pars); 469*a7e14dcfSSatish Balay 470*a7e14dcfSSatish Balay /* Test for termination. */ 471*a7e14dcfSSatish Balay if (info == 0) { 472*a7e14dcfSSatish Balay if (iter == itmax) info=4; 473*a7e14dcfSSatish Balay if (paru <= (1.0+p5*rtol)*pars) info=3; 474*a7e14dcfSSatish Balay if (paru == 0.0) info = 2; 475*a7e14dcfSSatish Balay } 476*a7e14dcfSSatish Balay 477*a7e14dcfSSatish Balay /* If exiting, store the best approximation and restore 478*a7e14dcfSSatish Balay the upper triangle of A. */ 479*a7e14dcfSSatish Balay 480*a7e14dcfSSatish Balay if (info != 0) { 481*a7e14dcfSSatish Balay 482*a7e14dcfSSatish Balay /* Compute the best current estimates for x and f. */ 483*a7e14dcfSSatish Balay 484*a7e14dcfSSatish Balay par = parf; 485*a7e14dcfSSatish Balay f = -p5 * (rxnorm*rxnorm + par*xnorm*xnorm); 486*a7e14dcfSSatish Balay if (rednc) { 487*a7e14dcfSSatish Balay f = -p5 * (rxnorm*rxnorm + par*delta*delta - rznorm*rznorm); 488*a7e14dcfSSatish Balay BLASaxpy_(&blasn, &alpha, z, &blas1, x, &blas1); 489*a7e14dcfSSatish Balay CHKMEMQ; 490*a7e14dcfSSatish Balay } 491*a7e14dcfSSatish Balay 492*a7e14dcfSSatish Balay /* Restore the upper triangle of A */ 493*a7e14dcfSSatish Balay 494*a7e14dcfSSatish Balay for (j = 0; j<n; j++) { 495*a7e14dcfSSatish Balay iblas = n - j - 1; 496*a7e14dcfSSatish Balay BLAScopy_(&iblas,&a[j+1 + j*lda],&blas1, &a[j + (j+1)*lda],&blaslda); 497*a7e14dcfSSatish Balay CHKMEMQ; 498*a7e14dcfSSatish Balay } 499*a7e14dcfSSatish Balay iblas = lda+1; 500*a7e14dcfSSatish Balay BLAScopy_(&blasn,wa1,&blas1,a,&iblas); 501*a7e14dcfSSatish Balay CHKMEMQ; 502*a7e14dcfSSatish Balay break; 503*a7e14dcfSSatish Balay } 504*a7e14dcfSSatish Balay par = PetscMax(parl,par+parc); 505*a7e14dcfSSatish Balay } 506*a7e14dcfSSatish Balay *retpar = par; 507*a7e14dcfSSatish Balay *retf = f; 508*a7e14dcfSSatish Balay *retinfo = info; 509*a7e14dcfSSatish Balay *retits = iter; 510*a7e14dcfSSatish Balay CHKMEMQ; 511*a7e14dcfSSatish Balay PetscFunctionReturn(0); 512*a7e14dcfSSatish Balay } 513