1560360afSLisandro Dalcin #include <petscsys.h> 2aaa7dc30SBarry Smith #include <petscblaslapack.h> 3a7e14dcfSSatish Balay 46c23d075SBarry Smith static PetscErrorCode estsv(PetscInt n, PetscReal *r, PetscInt ldr, PetscReal *svmin, PetscReal *z) 56c23d075SBarry Smith { 61cfd2cc8SBarry Smith PetscBLASInt blas1=1, blasn, blasnmi, blasj, blasldr; 7a7e14dcfSSatish Balay PetscInt i,j; 8a7e14dcfSSatish Balay PetscReal e,temp,w,wm,ynorm,znorm,s,sm; 91cfd2cc8SBarry Smith PetscErrorCode ierr; 106c23d075SBarry Smith 11a7e14dcfSSatish Balay PetscFunctionBegin; 121cfd2cc8SBarry Smith ierr = PetscBLASIntCast(n,&blasn);CHKERRQ(ierr); 131cfd2cc8SBarry Smith ierr = PetscBLASIntCast(ldr,&blasldr);CHKERRQ(ierr); 14a7e14dcfSSatish Balay for (i=0;i<n;i++) { 15a7e14dcfSSatish Balay z[i]=0.0; 16a7e14dcfSSatish Balay } 17a7e14dcfSSatish Balay e = PetscAbs(r[0]); 18a7e14dcfSSatish Balay if (e == 0.0) { 19a7e14dcfSSatish Balay *svmin = 0.0; 20a7e14dcfSSatish Balay z[0] = 1.0; 21a7e14dcfSSatish Balay } else { 22a7e14dcfSSatish Balay /* Solve R'*y = e */ 23a7e14dcfSSatish Balay for (i=0;i<n;i++) { 24a7e14dcfSSatish Balay /* Scale y. The scaling factor (0.01) reduces the number of scalings */ 256c23d075SBarry Smith if (z[i] >= 0.0) e =-PetscAbs(e); 266c23d075SBarry Smith else e = PetscAbs(e); 27a7e14dcfSSatish Balay 28a7e14dcfSSatish Balay if (PetscAbs(e - z[i]) > PetscAbs(r[i + ldr*i])) { 296c23d075SBarry Smith temp = PetscMin(0.01,PetscAbs(r[i + ldr*i]))/PetscAbs(e-z[i]); 300cbffdbaSBarry Smith PetscStackCallBLAS("BLASscal",BLASscal_(&blasn, &temp, z, &blas1)); 31a7e14dcfSSatish Balay e = temp*e; 32a7e14dcfSSatish Balay } 33a7e14dcfSSatish Balay 34a7e14dcfSSatish Balay /* Determine the two possible choices of y[i] */ 356c23d075SBarry Smith if (r[i + ldr*i] == 0.0) { 36a7e14dcfSSatish Balay w = wm = 1.0; 376c23d075SBarry Smith } else { 38a7e14dcfSSatish Balay w = (e - z[i]) / r[i + ldr*i]; 39a7e14dcfSSatish Balay wm = - (e + z[i]) / r[i + ldr*i]; 40a7e14dcfSSatish Balay } 41a7e14dcfSSatish Balay 42a7e14dcfSSatish Balay /* Chose y[i] based on the predicted value of y[j] for j>i */ 43a7e14dcfSSatish Balay s = PetscAbs(e - z[i]); 44a7e14dcfSSatish Balay sm = PetscAbs(e + z[i]); 45a7e14dcfSSatish Balay for (j=i+1;j<n;j++) { 46a7e14dcfSSatish Balay sm += PetscAbs(z[j] + wm * r[i + ldr*j]); 47a7e14dcfSSatish Balay } 48a7e14dcfSSatish Balay if (i < n-1) { 491cfd2cc8SBarry Smith ierr = PetscBLASIntCast(n-i-1,&blasnmi);CHKERRQ(ierr); 500cbffdbaSBarry Smith PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&blasnmi, &w, &r[i + ldr*(i+1)], &blasldr, &z[i+1], &blas1)); 5173cf7048SBarry Smith PetscStackCallBLAS("BLASasum",s += BLASasum_(&blasnmi, &z[i+1], &blas1)); 52a7e14dcfSSatish Balay } 53a7e14dcfSSatish Balay if (s < sm) { 54a7e14dcfSSatish Balay temp = wm - w; 55a7e14dcfSSatish Balay w = wm; 56a7e14dcfSSatish Balay if (i < n-1) { 570cbffdbaSBarry Smith PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&blasnmi, &temp, &r[i + ldr*(i+1)], &blasldr, &z[i+1], &blas1)); 58a7e14dcfSSatish Balay } 59a7e14dcfSSatish Balay } 60a7e14dcfSSatish Balay z[i] = w; 61a7e14dcfSSatish Balay } 62a7e14dcfSSatish Balay 6373cf7048SBarry Smith PetscStackCallBLAS("BLASnrm2",ynorm = BLASnrm2_(&blasn, z, &blas1)); 64a7e14dcfSSatish Balay 65a7e14dcfSSatish Balay /* Solve R*z = y */ 66a7e14dcfSSatish Balay for (j=n-1; j>=0; j--) { 67a7e14dcfSSatish Balay /* Scale z */ 68a7e14dcfSSatish Balay if (PetscAbs(z[j]) > PetscAbs(r[j + ldr*j])) { 69a7e14dcfSSatish Balay temp = PetscMin(0.01, PetscAbs(r[j + ldr*j] / z[j])); 700cbffdbaSBarry Smith PetscStackCallBLAS("BLASscal",BLASscal_(&blasn, &temp, z, &blas1)); 71a7e14dcfSSatish Balay ynorm *=temp; 72a7e14dcfSSatish Balay } 73a7e14dcfSSatish Balay if (r[j + ldr*j] == 0) { 74a7e14dcfSSatish Balay z[j] = 1.0; 75a7e14dcfSSatish Balay } else { 76a7e14dcfSSatish Balay z[j] = z[j] / r[j + ldr*j]; 77a7e14dcfSSatish Balay } 78a7e14dcfSSatish Balay temp = -z[j]; 791cfd2cc8SBarry Smith ierr = PetscBLASIntCast(j,&blasj);CHKERRQ(ierr); 800cbffdbaSBarry Smith PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&blasj,&temp,&r[0+ldr*j],&blas1,z,&blas1)); 81a7e14dcfSSatish Balay } 82a7e14dcfSSatish Balay 83a7e14dcfSSatish Balay /* Compute svmin and normalize z */ 8473cf7048SBarry Smith PetscStackCallBLAS("BLASnrm2",znorm = 1.0 / BLASnrm2_(&blasn, z, &blas1)); 85a7e14dcfSSatish Balay *svmin = ynorm*znorm; 860cbffdbaSBarry Smith PetscStackCallBLAS("BLASscal",BLASscal_(&blasn, &znorm, z, &blas1)); 87a7e14dcfSSatish Balay } 88a7e14dcfSSatish Balay PetscFunctionReturn(0); 89a7e14dcfSSatish Balay } 90a7e14dcfSSatish Balay 91a7e14dcfSSatish Balay /* 92a7e14dcfSSatish Balay c *********** 93a7e14dcfSSatish Balay c 94691b26d3SBarry Smith c Subroutine gqt 95a7e14dcfSSatish Balay c 96a7e14dcfSSatish Balay c Given an n by n symmetric matrix A, an n-vector b, and a 97a7e14dcfSSatish Balay c positive number delta, this subroutine determines a vector 98a7e14dcfSSatish Balay c x which approximately minimizes the quadratic function 99a7e14dcfSSatish Balay c 100a7e14dcfSSatish Balay c f(x) = (1/2)*x'*A*x + b'*x 101a7e14dcfSSatish Balay c 102a7e14dcfSSatish Balay c subject to the Euclidean norm constraint 103a7e14dcfSSatish Balay c 104a7e14dcfSSatish Balay c norm(x) <= delta. 105a7e14dcfSSatish Balay c 106a7e14dcfSSatish Balay c This subroutine computes an approximation x and a Lagrange 107a7e14dcfSSatish Balay c multiplier par such that either par is zero and 108a7e14dcfSSatish Balay c 109a7e14dcfSSatish Balay c norm(x) <= (1+rtol)*delta, 110a7e14dcfSSatish Balay c 111a7e14dcfSSatish Balay c or par is positive and 112a7e14dcfSSatish Balay c 113a7e14dcfSSatish Balay c abs(norm(x) - delta) <= rtol*delta. 114a7e14dcfSSatish Balay c 115a7e14dcfSSatish Balay c If xsol is the solution to the problem, the approximation x 116a7e14dcfSSatish Balay c satisfies 117a7e14dcfSSatish Balay c 118a7e14dcfSSatish Balay c f(x) <= ((1 - rtol)**2)*f(xsol) 119a7e14dcfSSatish Balay c 120a7e14dcfSSatish Balay c The subroutine statement is 121a7e14dcfSSatish Balay c 122691b26d3SBarry Smith c subroutine gqt(n,a,lda,b,delta,rtol,atol,itmax, 123a7e14dcfSSatish Balay c par,f,x,info,z,wa1,wa2) 124a7e14dcfSSatish Balay c 125a7e14dcfSSatish Balay c where 126a7e14dcfSSatish Balay c 127a7e14dcfSSatish Balay c n is an integer variable. 128a7e14dcfSSatish Balay c On entry n is the order of A. 129a7e14dcfSSatish Balay c On exit n is unchanged. 130a7e14dcfSSatish Balay c 131a7e14dcfSSatish Balay c a is a double precision array of dimension (lda,n). 132a7e14dcfSSatish Balay c On entry the full upper triangle of a must contain the 133a7e14dcfSSatish Balay c full upper triangle of the symmetric matrix A. 134a7e14dcfSSatish Balay c On exit the array contains the matrix A. 135a7e14dcfSSatish Balay c 136a7e14dcfSSatish Balay c lda is an integer variable. 137a7e14dcfSSatish Balay c On entry lda is the leading dimension of the array a. 138a7e14dcfSSatish Balay c On exit lda is unchanged. 139a7e14dcfSSatish Balay c 140a7e14dcfSSatish Balay c b is an double precision array of dimension n. 141a7e14dcfSSatish Balay c On entry b specifies the linear term in the quadratic. 142a7e14dcfSSatish Balay c On exit b is unchanged. 143a7e14dcfSSatish Balay c 144a7e14dcfSSatish Balay c delta is a double precision variable. 145a7e14dcfSSatish Balay c On entry delta is a bound on the Euclidean norm of x. 146a7e14dcfSSatish Balay c On exit delta is unchanged. 147a7e14dcfSSatish Balay c 148a7e14dcfSSatish Balay c rtol is a double precision variable. 149a7e14dcfSSatish Balay c On entry rtol is the relative accuracy desired in the 150a7e14dcfSSatish Balay c solution. Convergence occurs if 151a7e14dcfSSatish Balay c 152a7e14dcfSSatish Balay c f(x) <= ((1 - rtol)**2)*f(xsol) 153a7e14dcfSSatish Balay c 154a7e14dcfSSatish Balay c On exit rtol is unchanged. 155a7e14dcfSSatish Balay c 156a7e14dcfSSatish Balay c atol is a double precision variable. 157a7e14dcfSSatish Balay c On entry atol is the absolute accuracy desired in the 158a7e14dcfSSatish Balay c solution. Convergence occurs when 159a7e14dcfSSatish Balay c 160a7e14dcfSSatish Balay c norm(x) <= (1 + rtol)*delta 161a7e14dcfSSatish Balay c 162a7e14dcfSSatish Balay c max(-f(x),-f(xsol)) <= atol 163a7e14dcfSSatish Balay c 164a7e14dcfSSatish Balay c On exit atol is unchanged. 165a7e14dcfSSatish Balay c 166a7e14dcfSSatish Balay c itmax is an integer variable. 167a7e14dcfSSatish Balay c On entry itmax specifies the maximum number of iterations. 168a7e14dcfSSatish Balay c On exit itmax is unchanged. 169a7e14dcfSSatish Balay c 170a7e14dcfSSatish Balay c par is a double precision variable. 171a7e14dcfSSatish Balay c On entry par is an initial estimate of the Lagrange 172a7e14dcfSSatish Balay c multiplier for the constraint norm(x) <= delta. 173a7e14dcfSSatish Balay c On exit par contains the final estimate of the multiplier. 174a7e14dcfSSatish Balay c 175a7e14dcfSSatish Balay c f is a double precision variable. 176a7e14dcfSSatish Balay c On entry f need not be specified. 177a7e14dcfSSatish Balay c On exit f is set to f(x) at the output x. 178a7e14dcfSSatish Balay c 179a7e14dcfSSatish Balay c x is a double precision array of dimension n. 180a7e14dcfSSatish Balay c On entry x need not be specified. 181a7e14dcfSSatish Balay c On exit x is set to the final estimate of the solution. 182a7e14dcfSSatish Balay c 183a7e14dcfSSatish Balay c info is an integer variable. 184a7e14dcfSSatish Balay c On entry info need not be specified. 185a7e14dcfSSatish Balay c On exit info is set as follows: 186a7e14dcfSSatish Balay c 187a7e14dcfSSatish Balay c info = 1 The function value f(x) has the relative 188a7e14dcfSSatish Balay c accuracy specified by rtol. 189a7e14dcfSSatish Balay c 190a7e14dcfSSatish Balay c info = 2 The function value f(x) has the absolute 191a7e14dcfSSatish Balay c accuracy specified by atol. 192a7e14dcfSSatish Balay c 193a7e14dcfSSatish Balay c info = 3 Rounding errors prevent further progress. 194a7e14dcfSSatish Balay c On exit x is the best available approximation. 195a7e14dcfSSatish Balay c 196a7e14dcfSSatish Balay c info = 4 Failure to converge after itmax iterations. 197a7e14dcfSSatish Balay c On exit x is the best available approximation. 198a7e14dcfSSatish Balay c 199a7e14dcfSSatish Balay c z is a double precision work array of dimension n. 200a7e14dcfSSatish Balay c 201a7e14dcfSSatish Balay c wa1 is a double precision work array of dimension n. 202a7e14dcfSSatish Balay c 203a7e14dcfSSatish Balay c wa2 is a double precision work array of dimension n. 204a7e14dcfSSatish Balay c 205a7e14dcfSSatish Balay c Subprograms called 206a7e14dcfSSatish Balay c 207a7e14dcfSSatish Balay c MINPACK-2 ...... destsv 208a7e14dcfSSatish Balay c 209a7e14dcfSSatish Balay c LAPACK ......... dpotrf 210a7e14dcfSSatish Balay c 211a7e14dcfSSatish Balay c Level 1 BLAS ... daxpy, dcopy, ddot, dnrm2, dscal 212a7e14dcfSSatish Balay c 213a7e14dcfSSatish Balay c Level 2 BLAS ... dtrmv, dtrsv 214a7e14dcfSSatish Balay c 215a7e14dcfSSatish Balay c MINPACK-2 Project. October 1993. 216a7e14dcfSSatish Balay c Argonne National Laboratory and University of Minnesota. 217a7e14dcfSSatish Balay c Brett M. Averick, Richard Carter, and Jorge J. More' 218a7e14dcfSSatish Balay c 219a7e14dcfSSatish Balay c *********** 220a7e14dcfSSatish Balay */ 221a7e14dcfSSatish Balay PetscErrorCode gqt(PetscInt n, PetscReal *a, PetscInt lda, PetscReal *b, 222a7e14dcfSSatish Balay PetscReal delta, PetscReal rtol, PetscReal atol, 223a7e14dcfSSatish Balay PetscInt itmax, PetscReal *retpar, PetscReal *retf, 224a7e14dcfSSatish Balay PetscReal *x, PetscInt *retinfo, PetscInt *retits, 225a7e14dcfSSatish Balay PetscReal *z, PetscReal *wa1, PetscReal *wa2) 226a7e14dcfSSatish Balay { 227a7e14dcfSSatish Balay PetscErrorCode ierr; 228a7e14dcfSSatish Balay PetscReal f=0.0,p001=0.001,p5=0.5,minusone=-1,delta2=delta*delta; 229a7e14dcfSSatish Balay PetscInt iter, j, rednc,info; 230a7e14dcfSSatish Balay PetscBLASInt indef; 2311cfd2cc8SBarry Smith PetscBLASInt blas1=1, blasn, iblas, blaslda, blasldap1, blasinfo; 2326c23d075SBarry Smith PetscReal alpha, anorm, bnorm, parc, parf, parl, pars, par=*retpar,paru, prod, rxnorm, rznorm=0.0, temp, xnorm; 233a7e14dcfSSatish Balay 234a7e14dcfSSatish Balay PetscFunctionBegin; 2351cfd2cc8SBarry Smith ierr = PetscBLASIntCast(n,&blasn);CHKERRQ(ierr); 2361cfd2cc8SBarry Smith ierr = PetscBLASIntCast(lda,&blaslda);CHKERRQ(ierr); 2371cfd2cc8SBarry Smith ierr = PetscBLASIntCast(lda+1,&blasldap1);CHKERRQ(ierr); 238a7e14dcfSSatish Balay parf = 0.0; 239a7e14dcfSSatish Balay xnorm = 0.0; 240a7e14dcfSSatish Balay rxnorm = 0.0; 241a7e14dcfSSatish Balay rednc = 0; 242a7e14dcfSSatish Balay for (j=0; j<n; j++) { 243a7e14dcfSSatish Balay x[j] = 0.0; 244a7e14dcfSSatish Balay z[j] = 0.0; 245a7e14dcfSSatish Balay } 246a7e14dcfSSatish Balay 247a7e14dcfSSatish Balay /* Copy the diagonal and save A in its lower triangle */ 2480cbffdbaSBarry Smith PetscStackCallBLAS("BLAScopy",BLAScopy_(&blasn,a,&blasldap1, wa1, &blas1)); 249a7e14dcfSSatish Balay for (j=0;j<n-1;j++) { 2501cfd2cc8SBarry Smith ierr = PetscBLASIntCast(n - j - 1,&iblas);CHKERRQ(ierr); 2510cbffdbaSBarry Smith PetscStackCallBLAS("BLAScopy",BLAScopy_(&iblas,&a[j + lda*(j+1)], &blaslda, &a[j+1 + lda*j], &blas1)); 252a7e14dcfSSatish Balay } 253a7e14dcfSSatish Balay 254a7e14dcfSSatish Balay /* Calculate the l1-norm of A, the Gershgorin row sums, and the 255a7e14dcfSSatish Balay l2-norm of b */ 256a7e14dcfSSatish Balay anorm = 0.0; 257a7e14dcfSSatish Balay for (j=0;j<n;j++) { 25873cf7048SBarry Smith PetscStackCallBLAS("BLASasum",wa2[j] = BLASasum_(&blasn, &a[0 + lda*j], &blas1));CHKMEMQ; 259a7e14dcfSSatish Balay anorm = PetscMax(anorm,wa2[j]); 260a7e14dcfSSatish Balay } 261a7e14dcfSSatish Balay for (j=0;j<n;j++) { 262a7e14dcfSSatish Balay wa2[j] = wa2[j] - PetscAbs(wa1[j]); 263a7e14dcfSSatish Balay } 26473cf7048SBarry Smith PetscStackCallBLAS("BLASnrm2",bnorm = BLASnrm2_(&blasn,b,&blas1));CHKMEMQ; 265a7e14dcfSSatish Balay /* Calculate a lower bound, pars, for the domain of the problem. 266a7e14dcfSSatish Balay Also calculate an upper bound, paru, and a lower bound, parl, 267a7e14dcfSSatish Balay for the Lagrange multiplier. */ 268a7e14dcfSSatish Balay pars = parl = paru = -anorm; 269a7e14dcfSSatish Balay for (j=0;j<n;j++) { 270a7e14dcfSSatish Balay pars = PetscMax(pars, -wa1[j]); 271a7e14dcfSSatish Balay parl = PetscMax(parl, wa1[j] + wa2[j]); 272a7e14dcfSSatish Balay paru = PetscMax(paru, -wa1[j] + wa2[j]); 273a7e14dcfSSatish Balay } 274a7e14dcfSSatish Balay parl = PetscMax(bnorm/delta - parl,pars); 275a7e14dcfSSatish Balay parl = PetscMax(0.0,parl); 276a7e14dcfSSatish Balay paru = PetscMax(0.0, bnorm/delta + paru); 277a7e14dcfSSatish Balay 278a7e14dcfSSatish Balay /* If the input par lies outside of the interval (parl, paru), 279a7e14dcfSSatish Balay set par to the closer endpoint. */ 280a7e14dcfSSatish Balay 281a7e14dcfSSatish Balay par = PetscMax(par,parl); 282a7e14dcfSSatish Balay par = PetscMin(par,paru); 283a7e14dcfSSatish Balay 284a7e14dcfSSatish Balay /* Special case: parl == paru */ 285a7e14dcfSSatish Balay paru = PetscMax(paru, (1.0 + rtol)*parl); 286a7e14dcfSSatish Balay 287a7e14dcfSSatish Balay /* Beginning of an iteration */ 288a7e14dcfSSatish Balay 289a7e14dcfSSatish Balay info = 0; 290a7e14dcfSSatish Balay for (iter=1;iter<=itmax;iter++) { 291a7e14dcfSSatish Balay /* Safeguard par */ 292a7e14dcfSSatish Balay if (par <= pars && paru > 0) { 293a7e14dcfSSatish Balay par = PetscMax(p001, PetscSqrtScalar(parl/paru)) * paru; 294a7e14dcfSSatish Balay } 295a7e14dcfSSatish Balay 2961cfd2cc8SBarry Smith /* Copy the lower triangle of A into its upper triangle and compute A + par*I */ 297a7e14dcfSSatish Balay 298a7e14dcfSSatish Balay for (j=0;j<n-1;j++) { 2991cfd2cc8SBarry Smith ierr = PetscBLASIntCast(n - j - 1,&iblas);CHKERRQ(ierr); 3000cbffdbaSBarry Smith PetscStackCallBLAS("BLAScopy",BLAScopy_(&iblas,&a[j+1 + j*lda], &blas1,&a[j + (j+1)*lda], &blaslda)); 301a7e14dcfSSatish Balay } 302a7e14dcfSSatish Balay for (j=0;j<n;j++) { 303a7e14dcfSSatish Balay a[j + j*lda] = wa1[j] + par; 304a7e14dcfSSatish Balay } 305a7e14dcfSSatish Balay 3061cfd2cc8SBarry Smith /* Attempt the Cholesky factorization of A without referencing the lower triangular part. */ 3070cbffdbaSBarry Smith PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&blasn,a,&blaslda,&indef)); 308a7e14dcfSSatish Balay 309a7e14dcfSSatish Balay /* Case 1: A + par*I is pos. def. */ 310a7e14dcfSSatish Balay if (indef == 0) { 311a7e14dcfSSatish Balay 3121cfd2cc8SBarry Smith /* Compute an approximate solution x and save the last value of par with A + par*I pos. def. */ 313a7e14dcfSSatish Balay 314a7e14dcfSSatish Balay parf = par; 3150cbffdbaSBarry Smith PetscStackCallBLAS("BLAScopy",BLAScopy_(&blasn, b, &blas1, wa2, &blas1)); 3160cbffdbaSBarry Smith PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&blasn,&blas1,a,&blaslda,wa2,&blasn,&blasinfo)); 317*3c859ba3SBarry Smith PetscCheck(!blasinfo,PETSC_COMM_SELF,PETSC_ERR_LIB,"LAPACKtrtrs() returned info %d",blasinfo); 31873cf7048SBarry Smith PetscStackCallBLAS("BLASnrm2",rxnorm = BLASnrm2_(&blasn, wa2, &blas1)); 3190cbffdbaSBarry Smith PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","N","N",&blasn,&blas1,a,&blaslda,wa2,&blasn,&blasinfo)); 320*3c859ba3SBarry Smith PetscCheck(!blasinfo,PETSC_COMM_SELF,PETSC_ERR_LIB,"LAPACKtrtrs() returned info %d",blasinfo); 321e81852a0SSatish Balay 3220cbffdbaSBarry Smith PetscStackCallBLAS("BLAScopy",BLAScopy_(&blasn, wa2, &blas1, x, &blas1)); 3230cbffdbaSBarry Smith PetscStackCallBLAS("BLASscal",BLASscal_(&blasn, &minusone, x, &blas1)); 32473cf7048SBarry Smith PetscStackCallBLAS("BLASnrm2",xnorm = BLASnrm2_(&blasn, x, &blas1));CHKMEMQ; 325a7e14dcfSSatish Balay 326a7e14dcfSSatish Balay /* Test for convergence */ 3271cfd2cc8SBarry Smith if (PetscAbs(xnorm - delta) <= rtol*delta || (par == 0 && xnorm <= (1.0+rtol)*delta)) { 328a7e14dcfSSatish Balay info = 1; 329a7e14dcfSSatish Balay } 330a7e14dcfSSatish Balay 3311cfd2cc8SBarry Smith /* Compute a direction of negative curvature and use this information to improve pars. */ 33273cf7048SBarry Smith ierr = estsv(n,a,lda,&rznorm,z);CHKERRQ(ierr);CHKMEMQ; 333a7e14dcfSSatish Balay pars = PetscMax(pars, par-rznorm*rznorm); 334a7e14dcfSSatish Balay 3351cfd2cc8SBarry Smith /* Compute a negative curvature solution of the form x + alpha*z, where norm(x+alpha*z)==delta */ 336a7e14dcfSSatish Balay 337a7e14dcfSSatish Balay rednc = 0; 338a7e14dcfSSatish Balay if (xnorm < delta) { 339a7e14dcfSSatish Balay /* Compute alpha */ 34073cf7048SBarry Smith PetscStackCallBLAS("BLASdot",prod = BLASdot_(&blasn, z, &blas1, x, &blas1)/delta); 341a7e14dcfSSatish Balay temp = (delta - xnorm)*((delta + xnorm)/delta); 342a7e14dcfSSatish Balay alpha = temp/(PetscAbs(prod) + PetscSqrtScalar(prod*prod + temp/delta)); 3436c23d075SBarry Smith if (prod >= 0) alpha = PetscAbs(alpha); 3446c23d075SBarry Smith else alpha =-PetscAbs(alpha); 345a7e14dcfSSatish Balay 3461cfd2cc8SBarry Smith /* Test to decide if the negative curvature step produces a larger reduction than with z=0 */ 347a7e14dcfSSatish Balay rznorm = PetscAbs(alpha) * rznorm; 348a7e14dcfSSatish Balay if ((rznorm*rznorm + par*xnorm*xnorm)/(delta2) <= par) { 349a7e14dcfSSatish Balay rednc = 1; 350a7e14dcfSSatish Balay } 351a7e14dcfSSatish Balay /* Test for convergence */ 3526c23d075SBarry Smith if (p5 * rznorm*rznorm / delta2 <= rtol*(1.0-p5*rtol)*(par + rxnorm*rxnorm/delta2)) { 353a7e14dcfSSatish Balay info = 1; 3546c23d075SBarry Smith } else if (info == 0 && (p5*(par + rxnorm*rxnorm/delta2) <= atol/delta2)) { 355a7e14dcfSSatish Balay info = 2; 356a7e14dcfSSatish Balay } 357a7e14dcfSSatish Balay } 358a7e14dcfSSatish Balay 359a7e14dcfSSatish Balay /* Compute the Newton correction parc to par. */ 360a7e14dcfSSatish Balay if (xnorm == 0) { 361a7e14dcfSSatish Balay parc = -par; 362a7e14dcfSSatish Balay } else { 3630cbffdbaSBarry Smith PetscStackCallBLAS("BLAScopy",BLAScopy_(&blasn, x, &blas1, wa2, &blas1)); 364a7e14dcfSSatish Balay temp = 1.0/xnorm; 3650cbffdbaSBarry Smith PetscStackCallBLAS("BLASscal",BLASscal_(&blasn, &temp, wa2, &blas1)); 3660cbffdbaSBarry Smith PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&blasn, &blas1, a, &blaslda, wa2, &blasn, &blasinfo)); 367*3c859ba3SBarry Smith PetscCheck(!blasinfo,PETSC_COMM_SELF,PETSC_ERR_LIB,"LAPACKtrtrs() returned info %d",blasinfo); 36873cf7048SBarry Smith PetscStackCallBLAS("BLASnrm2",temp = BLASnrm2_(&blasn, wa2, &blas1)); 369a7e14dcfSSatish Balay parc = (xnorm - delta)/(delta*temp*temp); 370a7e14dcfSSatish Balay } 371a7e14dcfSSatish Balay 372a7e14dcfSSatish Balay /* update parl or paru */ 373a7e14dcfSSatish Balay if (xnorm > delta) { 374a7e14dcfSSatish Balay parl = PetscMax(parl, par); 375a7e14dcfSSatish Balay } else if (xnorm < delta) { 376a7e14dcfSSatish Balay paru = PetscMin(paru, par); 377a7e14dcfSSatish Balay } 378a7e14dcfSSatish Balay } else { 379a7e14dcfSSatish Balay /* Case 2: A + par*I is not pos. def. */ 380a7e14dcfSSatish Balay 3811cfd2cc8SBarry Smith /* Use the rank information from the Cholesky decomposition to update par. */ 382a7e14dcfSSatish Balay 383a7e14dcfSSatish Balay if (indef > 1) { 384a7e14dcfSSatish Balay /* Restore column indef to A + par*I. */ 385a7e14dcfSSatish Balay iblas = indef - 1; 3860cbffdbaSBarry Smith PetscStackCallBLAS("BLAScopy",BLAScopy_(&iblas,&a[indef-1 + 0*lda],&blaslda,&a[0 + (indef-1)*lda],&blas1)); 387a7e14dcfSSatish Balay a[indef-1 + (indef-1)*lda] = wa1[indef-1] + par; 388a7e14dcfSSatish Balay 389a7e14dcfSSatish Balay /* compute parc. */ 3900cbffdbaSBarry Smith PetscStackCallBLAS("BLAScopy",BLAScopy_(&iblas,&a[0 + (indef-1)*lda], &blas1, wa2, &blas1)); 3910cbffdbaSBarry Smith PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&iblas,&blas1,a,&blaslda,wa2,&blasn,&blasinfo)); 392*3c859ba3SBarry Smith PetscCheck(!blasinfo,PETSC_COMM_SELF,PETSC_ERR_LIB,"LAPACKtrtrs() returned info %d",blasinfo); 3930cbffdbaSBarry Smith PetscStackCallBLAS("BLAScopy",BLAScopy_(&iblas,wa2,&blas1,&a[0 + (indef-1)*lda],&blas1)); 39473cf7048SBarry Smith PetscStackCallBLAS("BLASnrm2",temp = BLASnrm2_(&iblas,&a[0 + (indef-1)*lda],&blas1));CHKMEMQ; 395a7e14dcfSSatish Balay a[indef-1 + (indef-1)*lda] -= temp*temp; 396e785d365SKarl Rupp PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","N","N",&iblas,&blas1,a,&blaslda,wa2,&blasn,&blasinfo)); 397*3c859ba3SBarry Smith PetscCheck(!blasinfo,PETSC_COMM_SELF,PETSC_ERR_LIB,"LAPACKtrtrs() returned info %d",blasinfo); 398a7e14dcfSSatish Balay } 399a7e14dcfSSatish Balay 400a7e14dcfSSatish Balay wa2[indef-1] = -1.0; 401a7e14dcfSSatish Balay iblas = indef; 40273cf7048SBarry Smith PetscStackCallBLAS("BLASnrm2",temp = BLASnrm2_(&iblas,wa2,&blas1)); 403a7e14dcfSSatish Balay parc = - a[indef-1 + (indef-1)*lda]/(temp*temp); 404a7e14dcfSSatish Balay pars = PetscMax(pars,par+parc); 405a7e14dcfSSatish Balay 406a7e14dcfSSatish Balay /* If necessary, increase paru slightly. 407a7e14dcfSSatish Balay This is needed because in some exceptional situations 408a7e14dcfSSatish Balay paru is the optimal value of par. */ 409a7e14dcfSSatish Balay 410a7e14dcfSSatish Balay paru = PetscMax(paru, (1.0+rtol)*pars); 411a7e14dcfSSatish Balay } 412a7e14dcfSSatish Balay 413a7e14dcfSSatish Balay /* Use pars to update parl */ 414a7e14dcfSSatish Balay parl = PetscMax(parl,pars); 415a7e14dcfSSatish Balay 416e4cb33bbSBarry Smith /* Test for converged. */ 417a7e14dcfSSatish Balay if (info == 0) { 418a7e14dcfSSatish Balay if (iter == itmax) info=4; 419a7e14dcfSSatish Balay if (paru <= (1.0+p5*rtol)*pars) info=3; 420a7e14dcfSSatish Balay if (paru == 0.0) info = 2; 421a7e14dcfSSatish Balay } 422a7e14dcfSSatish Balay 423a7e14dcfSSatish Balay /* If exiting, store the best approximation and restore 424a7e14dcfSSatish Balay the upper triangle of A. */ 425a7e14dcfSSatish Balay 426a7e14dcfSSatish Balay if (info != 0) { 427a7e14dcfSSatish Balay /* Compute the best current estimates for x and f. */ 428a7e14dcfSSatish Balay par = parf; 429a7e14dcfSSatish Balay f = -p5 * (rxnorm*rxnorm + par*xnorm*xnorm); 430a7e14dcfSSatish Balay if (rednc) { 431a7e14dcfSSatish Balay f = -p5 * (rxnorm*rxnorm + par*delta*delta - rznorm*rznorm); 4320cbffdbaSBarry Smith PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&blasn, &alpha, z, &blas1, x, &blas1)); 433a7e14dcfSSatish Balay } 434a7e14dcfSSatish Balay /* Restore the upper triangle of A */ 435a7e14dcfSSatish Balay for (j = 0; j<n; j++) { 4361cfd2cc8SBarry Smith ierr = PetscBLASIntCast(n - j - 1,&iblas);CHKERRQ(ierr); 4370cbffdbaSBarry Smith PetscStackCallBLAS("BLAScopy",BLAScopy_(&iblas,&a[j+1 + j*lda],&blas1, &a[j + (j+1)*lda],&blaslda)); 438a7e14dcfSSatish Balay } 4391cfd2cc8SBarry Smith ierr = PetscBLASIntCast(lda+1,&iblas);CHKERRQ(ierr); 4400cbffdbaSBarry Smith PetscStackCallBLAS("BLAScopy",BLAScopy_(&blasn,wa1,&blas1,a,&iblas)); 441a7e14dcfSSatish Balay break; 442a7e14dcfSSatish Balay } 443a7e14dcfSSatish Balay par = PetscMax(parl,par+parc); 444a7e14dcfSSatish Balay } 445a7e14dcfSSatish Balay *retpar = par; 446a7e14dcfSSatish Balay *retf = f; 447a7e14dcfSSatish Balay *retinfo = info; 448a7e14dcfSSatish Balay *retits = iter; 449a7e14dcfSSatish Balay CHKMEMQ; 450a7e14dcfSSatish Balay PetscFunctionReturn(0); 451a7e14dcfSSatish Balay } 452