1560360afSLisandro Dalcin #include <petscsys.h> 2aaa7dc30SBarry Smith #include <petscblaslapack.h> 3a7e14dcfSSatish Balay 4d71ae5a4SJacob Faibussowitsch static PetscErrorCode estsv(PetscInt n, PetscReal *r, PetscInt ldr, PetscReal *svmin, PetscReal *z) 5d71ae5a4SJacob Faibussowitsch { 61cfd2cc8SBarry Smith PetscBLASInt blas1 = 1, blasn, blasnmi, blasj, blasldr; 7a7e14dcfSSatish Balay PetscInt i, j; 8a7e14dcfSSatish Balay PetscReal e, temp, w, wm, ynorm, znorm, s, sm; 96c23d075SBarry Smith 10a7e14dcfSSatish Balay PetscFunctionBegin; 119566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(n, &blasn)); 129566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(ldr, &blasldr)); 13ad540459SPierre Jolivet for (i = 0; i < n; i++) z[i] = 0.0; 14a7e14dcfSSatish Balay e = PetscAbs(r[0]); 15a7e14dcfSSatish Balay if (e == 0.0) { 16a7e14dcfSSatish Balay *svmin = 0.0; 17a7e14dcfSSatish Balay z[0] = 1.0; 18a7e14dcfSSatish Balay } else { 19a7e14dcfSSatish Balay /* Solve R'*y = e */ 20a7e14dcfSSatish Balay for (i = 0; i < n; i++) { 21a7e14dcfSSatish Balay /* Scale y. The scaling factor (0.01) reduces the number of scalings */ 226c23d075SBarry Smith if (z[i] >= 0.0) e = -PetscAbs(e); 236c23d075SBarry Smith else e = PetscAbs(e); 24a7e14dcfSSatish Balay 25a7e14dcfSSatish Balay if (PetscAbs(e - z[i]) > PetscAbs(r[i + ldr * i])) { 266c23d075SBarry Smith temp = PetscMin(0.01, PetscAbs(r[i + ldr * i])) / PetscAbs(e - z[i]); 27792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&blasn, &temp, z, &blas1)); 28a7e14dcfSSatish Balay e = temp * e; 29a7e14dcfSSatish Balay } 30a7e14dcfSSatish Balay 31a7e14dcfSSatish Balay /* Determine the two possible choices of y[i] */ 326c23d075SBarry Smith if (r[i + ldr * i] == 0.0) { 33a7e14dcfSSatish Balay w = wm = 1.0; 346c23d075SBarry Smith } else { 35a7e14dcfSSatish Balay w = (e - z[i]) / r[i + ldr * i]; 36a7e14dcfSSatish Balay wm = -(e + z[i]) / r[i + ldr * i]; 37a7e14dcfSSatish Balay } 38a7e14dcfSSatish Balay 39a7e14dcfSSatish Balay /* Chose y[i] based on the predicted value of y[j] for j>i */ 40a7e14dcfSSatish Balay s = PetscAbs(e - z[i]); 41a7e14dcfSSatish Balay sm = PetscAbs(e + z[i]); 42ad540459SPierre Jolivet for (j = i + 1; j < n; j++) sm += PetscAbs(z[j] + wm * r[i + ldr * j]); 43a7e14dcfSSatish Balay if (i < n - 1) { 449566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(n - i - 1, &blasnmi)); 45792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasnmi, &w, &r[i + ldr * (i + 1)], &blasldr, &z[i + 1], &blas1)); 46792fecdfSBarry Smith PetscCallBLAS("BLASasum", s += BLASasum_(&blasnmi, &z[i + 1], &blas1)); 47a7e14dcfSSatish Balay } 48a7e14dcfSSatish Balay if (s < sm) { 49a7e14dcfSSatish Balay temp = wm - w; 50a7e14dcfSSatish Balay w = wm; 5148a46eb9SPierre Jolivet if (i < n - 1) PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasnmi, &temp, &r[i + ldr * (i + 1)], &blasldr, &z[i + 1], &blas1)); 52a7e14dcfSSatish Balay } 53a7e14dcfSSatish Balay z[i] = w; 54a7e14dcfSSatish Balay } 55a7e14dcfSSatish Balay 56792fecdfSBarry Smith PetscCallBLAS("BLASnrm2", ynorm = BLASnrm2_(&blasn, z, &blas1)); 57a7e14dcfSSatish Balay 58a7e14dcfSSatish Balay /* Solve R*z = y */ 59a7e14dcfSSatish Balay for (j = n - 1; j >= 0; j--) { 60a7e14dcfSSatish Balay /* Scale z */ 61a7e14dcfSSatish Balay if (PetscAbs(z[j]) > PetscAbs(r[j + ldr * j])) { 62a7e14dcfSSatish Balay temp = PetscMin(0.01, PetscAbs(r[j + ldr * j] / z[j])); 63792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&blasn, &temp, z, &blas1)); 64a7e14dcfSSatish Balay ynorm *= temp; 65a7e14dcfSSatish Balay } 66a7e14dcfSSatish Balay if (r[j + ldr * j] == 0) { 67a7e14dcfSSatish Balay z[j] = 1.0; 68a7e14dcfSSatish Balay } else { 69a7e14dcfSSatish Balay z[j] = z[j] / r[j + ldr * j]; 70a7e14dcfSSatish Balay } 71a7e14dcfSSatish Balay temp = -z[j]; 729566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(j, &blasj)); 73792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasj, &temp, &r[0 + ldr * j], &blas1, z, &blas1)); 74a7e14dcfSSatish Balay } 75a7e14dcfSSatish Balay 76a7e14dcfSSatish Balay /* Compute svmin and normalize z */ 77792fecdfSBarry Smith PetscCallBLAS("BLASnrm2", znorm = 1.0 / BLASnrm2_(&blasn, z, &blas1)); 78a7e14dcfSSatish Balay *svmin = ynorm * znorm; 79792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&blasn, &znorm, z, &blas1)); 80a7e14dcfSSatish Balay } 81*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 82a7e14dcfSSatish Balay } 83a7e14dcfSSatish Balay 84a7e14dcfSSatish Balay /* 85a7e14dcfSSatish Balay c *********** 86a7e14dcfSSatish Balay c 87691b26d3SBarry Smith c Subroutine gqt 88a7e14dcfSSatish Balay c 89a7e14dcfSSatish Balay c Given an n by n symmetric matrix A, an n-vector b, and a 90a7e14dcfSSatish Balay c positive number delta, this subroutine determines a vector 91a7e14dcfSSatish Balay c x which approximately minimizes the quadratic function 92a7e14dcfSSatish Balay c 93a7e14dcfSSatish Balay c f(x) = (1/2)*x'*A*x + b'*x 94a7e14dcfSSatish Balay c 95a7e14dcfSSatish Balay c subject to the Euclidean norm constraint 96a7e14dcfSSatish Balay c 97a7e14dcfSSatish Balay c norm(x) <= delta. 98a7e14dcfSSatish Balay c 99a7e14dcfSSatish Balay c This subroutine computes an approximation x and a Lagrange 100a7e14dcfSSatish Balay c multiplier par such that either par is zero and 101a7e14dcfSSatish Balay c 102a7e14dcfSSatish Balay c norm(x) <= (1+rtol)*delta, 103a7e14dcfSSatish Balay c 104a7e14dcfSSatish Balay c or par is positive and 105a7e14dcfSSatish Balay c 106a7e14dcfSSatish Balay c abs(norm(x) - delta) <= rtol*delta. 107a7e14dcfSSatish Balay c 108a7e14dcfSSatish Balay c If xsol is the solution to the problem, the approximation x 109a7e14dcfSSatish Balay c satisfies 110a7e14dcfSSatish Balay c 111a7e14dcfSSatish Balay c f(x) <= ((1 - rtol)**2)*f(xsol) 112a7e14dcfSSatish Balay c 113a7e14dcfSSatish Balay c The subroutine statement is 114a7e14dcfSSatish Balay c 115691b26d3SBarry Smith c subroutine gqt(n,a,lda,b,delta,rtol,atol,itmax, 116a7e14dcfSSatish Balay c par,f,x,info,z,wa1,wa2) 117a7e14dcfSSatish Balay c 118a7e14dcfSSatish Balay c where 119a7e14dcfSSatish Balay c 120a7e14dcfSSatish Balay c n is an integer variable. 121a7e14dcfSSatish Balay c On entry n is the order of A. 122a7e14dcfSSatish Balay c On exit n is unchanged. 123a7e14dcfSSatish Balay c 124a7e14dcfSSatish Balay c a is a double precision array of dimension (lda,n). 125a7e14dcfSSatish Balay c On entry the full upper triangle of a must contain the 126a7e14dcfSSatish Balay c full upper triangle of the symmetric matrix A. 127a7e14dcfSSatish Balay c On exit the array contains the matrix A. 128a7e14dcfSSatish Balay c 129a7e14dcfSSatish Balay c lda is an integer variable. 130a7e14dcfSSatish Balay c On entry lda is the leading dimension of the array a. 131a7e14dcfSSatish Balay c On exit lda is unchanged. 132a7e14dcfSSatish Balay c 133a7e14dcfSSatish Balay c b is an double precision array of dimension n. 134a7e14dcfSSatish Balay c On entry b specifies the linear term in the quadratic. 135a7e14dcfSSatish Balay c On exit b is unchanged. 136a7e14dcfSSatish Balay c 137a7e14dcfSSatish Balay c delta is a double precision variable. 138a7e14dcfSSatish Balay c On entry delta is a bound on the Euclidean norm of x. 139a7e14dcfSSatish Balay c On exit delta is unchanged. 140a7e14dcfSSatish Balay c 141a7e14dcfSSatish Balay c rtol is a double precision variable. 142a7e14dcfSSatish Balay c On entry rtol is the relative accuracy desired in the 143a7e14dcfSSatish Balay c solution. Convergence occurs if 144a7e14dcfSSatish Balay c 145a7e14dcfSSatish Balay c f(x) <= ((1 - rtol)**2)*f(xsol) 146a7e14dcfSSatish Balay c 147a7e14dcfSSatish Balay c On exit rtol is unchanged. 148a7e14dcfSSatish Balay c 149a7e14dcfSSatish Balay c atol is a double precision variable. 150a7e14dcfSSatish Balay c On entry atol is the absolute accuracy desired in the 151a7e14dcfSSatish Balay c solution. Convergence occurs when 152a7e14dcfSSatish Balay c 153a7e14dcfSSatish Balay c norm(x) <= (1 + rtol)*delta 154a7e14dcfSSatish Balay c 155a7e14dcfSSatish Balay c max(-f(x),-f(xsol)) <= atol 156a7e14dcfSSatish Balay c 157a7e14dcfSSatish Balay c On exit atol is unchanged. 158a7e14dcfSSatish Balay c 159a7e14dcfSSatish Balay c itmax is an integer variable. 160a7e14dcfSSatish Balay c On entry itmax specifies the maximum number of iterations. 161a7e14dcfSSatish Balay c On exit itmax is unchanged. 162a7e14dcfSSatish Balay c 163a7e14dcfSSatish Balay c par is a double precision variable. 164a7e14dcfSSatish Balay c On entry par is an initial estimate of the Lagrange 165a7e14dcfSSatish Balay c multiplier for the constraint norm(x) <= delta. 166a7e14dcfSSatish Balay c On exit par contains the final estimate of the multiplier. 167a7e14dcfSSatish Balay c 168a7e14dcfSSatish Balay c f is a double precision variable. 169a7e14dcfSSatish Balay c On entry f need not be specified. 170a7e14dcfSSatish Balay c On exit f is set to f(x) at the output x. 171a7e14dcfSSatish Balay c 172a7e14dcfSSatish Balay c x is a double precision array of dimension n. 173a7e14dcfSSatish Balay c On entry x need not be specified. 174a7e14dcfSSatish Balay c On exit x is set to the final estimate of the solution. 175a7e14dcfSSatish Balay c 176a7e14dcfSSatish Balay c info is an integer variable. 177a7e14dcfSSatish Balay c On entry info need not be specified. 178a7e14dcfSSatish Balay c On exit info is set as follows: 179a7e14dcfSSatish Balay c 180a7e14dcfSSatish Balay c info = 1 The function value f(x) has the relative 181a7e14dcfSSatish Balay c accuracy specified by rtol. 182a7e14dcfSSatish Balay c 183a7e14dcfSSatish Balay c info = 2 The function value f(x) has the absolute 184a7e14dcfSSatish Balay c accuracy specified by atol. 185a7e14dcfSSatish Balay c 186a7e14dcfSSatish Balay c info = 3 Rounding errors prevent further progress. 187a7e14dcfSSatish Balay c On exit x is the best available approximation. 188a7e14dcfSSatish Balay c 189a7e14dcfSSatish Balay c info = 4 Failure to converge after itmax iterations. 190a7e14dcfSSatish Balay c On exit x is the best available approximation. 191a7e14dcfSSatish Balay c 192a7e14dcfSSatish Balay c z is a double precision work array of dimension n. 193a7e14dcfSSatish Balay c 194a7e14dcfSSatish Balay c wa1 is a double precision work array of dimension n. 195a7e14dcfSSatish Balay c 196a7e14dcfSSatish Balay c wa2 is a double precision work array of dimension n. 197a7e14dcfSSatish Balay c 198a7e14dcfSSatish Balay c Subprograms called 199a7e14dcfSSatish Balay c 200a7e14dcfSSatish Balay c MINPACK-2 ...... destsv 201a7e14dcfSSatish Balay c 202a7e14dcfSSatish Balay c LAPACK ......... dpotrf 203a7e14dcfSSatish Balay c 204a7e14dcfSSatish Balay c Level 1 BLAS ... daxpy, dcopy, ddot, dnrm2, dscal 205a7e14dcfSSatish Balay c 206a7e14dcfSSatish Balay c Level 2 BLAS ... dtrmv, dtrsv 207a7e14dcfSSatish Balay c 208a7e14dcfSSatish Balay c MINPACK-2 Project. October 1993. 209a7e14dcfSSatish Balay c Argonne National Laboratory and University of Minnesota. 210a7e14dcfSSatish Balay c Brett M. Averick, Richard Carter, and Jorge J. More' 211a7e14dcfSSatish Balay c 212a7e14dcfSSatish Balay c *********** 213a7e14dcfSSatish Balay */ 214d71ae5a4SJacob Faibussowitsch PetscErrorCode gqt(PetscInt n, PetscReal *a, PetscInt lda, PetscReal *b, PetscReal delta, PetscReal rtol, PetscReal atol, PetscInt itmax, PetscReal *retpar, PetscReal *retf, PetscReal *x, PetscInt *retinfo, PetscInt *retits, PetscReal *z, PetscReal *wa1, PetscReal *wa2) 215d71ae5a4SJacob Faibussowitsch { 216a7e14dcfSSatish Balay PetscReal f = 0.0, p001 = 0.001, p5 = 0.5, minusone = -1, delta2 = delta * delta; 217a7e14dcfSSatish Balay PetscInt iter, j, rednc, info; 218a7e14dcfSSatish Balay PetscBLASInt indef; 2191cfd2cc8SBarry Smith PetscBLASInt blas1 = 1, blasn, iblas, blaslda, blasldap1, blasinfo; 2206c23d075SBarry Smith PetscReal alpha, anorm, bnorm, parc, parf, parl, pars, par = *retpar, paru, prod, rxnorm, rznorm = 0.0, temp, xnorm; 221a7e14dcfSSatish Balay 222a7e14dcfSSatish Balay PetscFunctionBegin; 2239566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(n, &blasn)); 2249566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(lda, &blaslda)); 2259566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(lda + 1, &blasldap1)); 226a7e14dcfSSatish Balay parf = 0.0; 227a7e14dcfSSatish Balay xnorm = 0.0; 228a7e14dcfSSatish Balay rxnorm = 0.0; 229a7e14dcfSSatish Balay rednc = 0; 230a7e14dcfSSatish Balay for (j = 0; j < n; j++) { 231a7e14dcfSSatish Balay x[j] = 0.0; 232a7e14dcfSSatish Balay z[j] = 0.0; 233a7e14dcfSSatish Balay } 234a7e14dcfSSatish Balay 235a7e14dcfSSatish Balay /* Copy the diagonal and save A in its lower triangle */ 236792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, a, &blasldap1, wa1, &blas1)); 237a7e14dcfSSatish Balay for (j = 0; j < n - 1; j++) { 2389566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(n - j - 1, &iblas)); 239792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[j + lda * (j + 1)], &blaslda, &a[j + 1 + lda * j], &blas1)); 240a7e14dcfSSatish Balay } 241a7e14dcfSSatish Balay 242a7e14dcfSSatish Balay /* Calculate the l1-norm of A, the Gershgorin row sums, and the 243a7e14dcfSSatish Balay l2-norm of b */ 244a7e14dcfSSatish Balay anorm = 0.0; 245a7e14dcfSSatish Balay for (j = 0; j < n; j++) { 2469371c9d4SSatish Balay PetscCallBLAS("BLASasum", wa2[j] = BLASasum_(&blasn, &a[0 + lda * j], &blas1)); 2479371c9d4SSatish Balay CHKMEMQ; 248a7e14dcfSSatish Balay anorm = PetscMax(anorm, wa2[j]); 249a7e14dcfSSatish Balay } 250ad540459SPierre Jolivet for (j = 0; j < n; j++) wa2[j] = wa2[j] - PetscAbs(wa1[j]); 2519371c9d4SSatish Balay PetscCallBLAS("BLASnrm2", bnorm = BLASnrm2_(&blasn, b, &blas1)); 2529371c9d4SSatish Balay CHKMEMQ; 253a7e14dcfSSatish Balay /* Calculate a lower bound, pars, for the domain of the problem. 254a7e14dcfSSatish Balay Also calculate an upper bound, paru, and a lower bound, parl, 255a7e14dcfSSatish Balay for the Lagrange multiplier. */ 256a7e14dcfSSatish Balay pars = parl = paru = -anorm; 257a7e14dcfSSatish Balay for (j = 0; j < n; j++) { 258a7e14dcfSSatish Balay pars = PetscMax(pars, -wa1[j]); 259a7e14dcfSSatish Balay parl = PetscMax(parl, wa1[j] + wa2[j]); 260a7e14dcfSSatish Balay paru = PetscMax(paru, -wa1[j] + wa2[j]); 261a7e14dcfSSatish Balay } 262a7e14dcfSSatish Balay parl = PetscMax(bnorm / delta - parl, pars); 263a7e14dcfSSatish Balay parl = PetscMax(0.0, parl); 264a7e14dcfSSatish Balay paru = PetscMax(0.0, bnorm / delta + paru); 265a7e14dcfSSatish Balay 266a7e14dcfSSatish Balay /* If the input par lies outside of the interval (parl, paru), 267a7e14dcfSSatish Balay set par to the closer endpoint. */ 268a7e14dcfSSatish Balay 269a7e14dcfSSatish Balay par = PetscMax(par, parl); 270a7e14dcfSSatish Balay par = PetscMin(par, paru); 271a7e14dcfSSatish Balay 272a7e14dcfSSatish Balay /* Special case: parl == paru */ 273a7e14dcfSSatish Balay paru = PetscMax(paru, (1.0 + rtol) * parl); 274a7e14dcfSSatish Balay 275a7e14dcfSSatish Balay /* Beginning of an iteration */ 276a7e14dcfSSatish Balay 277a7e14dcfSSatish Balay info = 0; 278a7e14dcfSSatish Balay for (iter = 1; iter <= itmax; iter++) { 279a7e14dcfSSatish Balay /* Safeguard par */ 280ad540459SPierre Jolivet if (par <= pars && paru > 0) par = PetscMax(p001, PetscSqrtScalar(parl / paru)) * paru; 281a7e14dcfSSatish Balay 2821cfd2cc8SBarry Smith /* Copy the lower triangle of A into its upper triangle and compute A + par*I */ 283a7e14dcfSSatish Balay 284a7e14dcfSSatish Balay for (j = 0; j < n - 1; j++) { 2859566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(n - j - 1, &iblas)); 286792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[j + 1 + j * lda], &blas1, &a[j + (j + 1) * lda], &blaslda)); 287a7e14dcfSSatish Balay } 288ad540459SPierre Jolivet for (j = 0; j < n; j++) a[j + j * lda] = wa1[j] + par; 289a7e14dcfSSatish Balay 2901cfd2cc8SBarry Smith /* Attempt the Cholesky factorization of A without referencing the lower triangular part. */ 291792fecdfSBarry Smith PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("U", &blasn, a, &blaslda, &indef)); 292a7e14dcfSSatish Balay 293a7e14dcfSSatish Balay /* Case 1: A + par*I is pos. def. */ 294a7e14dcfSSatish Balay if (indef == 0) { 2951cfd2cc8SBarry Smith /* Compute an approximate solution x and save the last value of par with A + par*I pos. def. */ 296a7e14dcfSSatish Balay 297a7e14dcfSSatish Balay parf = par; 298792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, b, &blas1, wa2, &blas1)); 299792fecdfSBarry Smith PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &blasn, &blas1, a, &blaslda, wa2, &blasn, &blasinfo)); 30063a3b9bcSJacob Faibussowitsch PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo); 301792fecdfSBarry Smith PetscCallBLAS("BLASnrm2", rxnorm = BLASnrm2_(&blasn, wa2, &blas1)); 302792fecdfSBarry Smith PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &blasn, &blas1, a, &blaslda, wa2, &blasn, &blasinfo)); 30363a3b9bcSJacob Faibussowitsch PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo); 304e81852a0SSatish Balay 305792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, wa2, &blas1, x, &blas1)); 306792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&blasn, &minusone, x, &blas1)); 3079371c9d4SSatish Balay PetscCallBLAS("BLASnrm2", xnorm = BLASnrm2_(&blasn, x, &blas1)); 3089371c9d4SSatish Balay CHKMEMQ; 309a7e14dcfSSatish Balay 310a7e14dcfSSatish Balay /* Test for convergence */ 311ad540459SPierre Jolivet if (PetscAbs(xnorm - delta) <= rtol * delta || (par == 0 && xnorm <= (1.0 + rtol) * delta)) info = 1; 312a7e14dcfSSatish Balay 3131cfd2cc8SBarry Smith /* Compute a direction of negative curvature and use this information to improve pars. */ 3149371c9d4SSatish Balay PetscCall(estsv(n, a, lda, &rznorm, z)); 3159371c9d4SSatish Balay CHKMEMQ; 316a7e14dcfSSatish Balay pars = PetscMax(pars, par - rznorm * rznorm); 317a7e14dcfSSatish Balay 3181cfd2cc8SBarry Smith /* Compute a negative curvature solution of the form x + alpha*z, where norm(x+alpha*z)==delta */ 319a7e14dcfSSatish Balay 320a7e14dcfSSatish Balay rednc = 0; 321a7e14dcfSSatish Balay if (xnorm < delta) { 322a7e14dcfSSatish Balay /* Compute alpha */ 323792fecdfSBarry Smith PetscCallBLAS("BLASdot", prod = BLASdot_(&blasn, z, &blas1, x, &blas1) / delta); 324a7e14dcfSSatish Balay temp = (delta - xnorm) * ((delta + xnorm) / delta); 325a7e14dcfSSatish Balay alpha = temp / (PetscAbs(prod) + PetscSqrtScalar(prod * prod + temp / delta)); 3266c23d075SBarry Smith if (prod >= 0) alpha = PetscAbs(alpha); 3276c23d075SBarry Smith else alpha = -PetscAbs(alpha); 328a7e14dcfSSatish Balay 3291cfd2cc8SBarry Smith /* Test to decide if the negative curvature step produces a larger reduction than with z=0 */ 330a7e14dcfSSatish Balay rznorm = PetscAbs(alpha) * rznorm; 331ad540459SPierre Jolivet if ((rznorm * rznorm + par * xnorm * xnorm) / (delta2) <= par) rednc = 1; 332a7e14dcfSSatish Balay /* Test for convergence */ 3336c23d075SBarry Smith if (p5 * rznorm * rznorm / delta2 <= rtol * (1.0 - p5 * rtol) * (par + rxnorm * rxnorm / delta2)) { 334a7e14dcfSSatish Balay info = 1; 3356c23d075SBarry Smith } else if (info == 0 && (p5 * (par + rxnorm * rxnorm / delta2) <= atol / delta2)) { 336a7e14dcfSSatish Balay info = 2; 337a7e14dcfSSatish Balay } 338a7e14dcfSSatish Balay } 339a7e14dcfSSatish Balay 340a7e14dcfSSatish Balay /* Compute the Newton correction parc to par. */ 341a7e14dcfSSatish Balay if (xnorm == 0) { 342a7e14dcfSSatish Balay parc = -par; 343a7e14dcfSSatish Balay } else { 344792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, x, &blas1, wa2, &blas1)); 345a7e14dcfSSatish Balay temp = 1.0 / xnorm; 346792fecdfSBarry Smith PetscCallBLAS("BLASscal", BLASscal_(&blasn, &temp, wa2, &blas1)); 347792fecdfSBarry Smith PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &blasn, &blas1, a, &blaslda, wa2, &blasn, &blasinfo)); 34863a3b9bcSJacob Faibussowitsch PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo); 349792fecdfSBarry Smith PetscCallBLAS("BLASnrm2", temp = BLASnrm2_(&blasn, wa2, &blas1)); 350a7e14dcfSSatish Balay parc = (xnorm - delta) / (delta * temp * temp); 351a7e14dcfSSatish Balay } 352a7e14dcfSSatish Balay 353a7e14dcfSSatish Balay /* update parl or paru */ 354a7e14dcfSSatish Balay if (xnorm > delta) { 355a7e14dcfSSatish Balay parl = PetscMax(parl, par); 356a7e14dcfSSatish Balay } else if (xnorm < delta) { 357a7e14dcfSSatish Balay paru = PetscMin(paru, par); 358a7e14dcfSSatish Balay } 359a7e14dcfSSatish Balay } else { 360a7e14dcfSSatish Balay /* Case 2: A + par*I is not pos. def. */ 361a7e14dcfSSatish Balay 3621cfd2cc8SBarry Smith /* Use the rank information from the Cholesky decomposition to update par. */ 363a7e14dcfSSatish Balay 364a7e14dcfSSatish Balay if (indef > 1) { 365a7e14dcfSSatish Balay /* Restore column indef to A + par*I. */ 366a7e14dcfSSatish Balay iblas = indef - 1; 367792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[indef - 1 + 0 * lda], &blaslda, &a[0 + (indef - 1) * lda], &blas1)); 368a7e14dcfSSatish Balay a[indef - 1 + (indef - 1) * lda] = wa1[indef - 1] + par; 369a7e14dcfSSatish Balay 370a7e14dcfSSatish Balay /* compute parc. */ 371792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[0 + (indef - 1) * lda], &blas1, wa2, &blas1)); 372792fecdfSBarry Smith PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &iblas, &blas1, a, &blaslda, wa2, &blasn, &blasinfo)); 37363a3b9bcSJacob Faibussowitsch PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo); 374792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, wa2, &blas1, &a[0 + (indef - 1) * lda], &blas1)); 3759371c9d4SSatish Balay PetscCallBLAS("BLASnrm2", temp = BLASnrm2_(&iblas, &a[0 + (indef - 1) * lda], &blas1)); 3769371c9d4SSatish Balay CHKMEMQ; 377a7e14dcfSSatish Balay a[indef - 1 + (indef - 1) * lda] -= temp * temp; 378792fecdfSBarry Smith PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "N", "N", &iblas, &blas1, a, &blaslda, wa2, &blasn, &blasinfo)); 37963a3b9bcSJacob Faibussowitsch PetscCheck(!blasinfo, PETSC_COMM_SELF, PETSC_ERR_LIB, "LAPACKtrtrs() returned info %" PetscBLASInt_FMT, blasinfo); 380a7e14dcfSSatish Balay } 381a7e14dcfSSatish Balay 382a7e14dcfSSatish Balay wa2[indef - 1] = -1.0; 383a7e14dcfSSatish Balay iblas = indef; 384792fecdfSBarry Smith PetscCallBLAS("BLASnrm2", temp = BLASnrm2_(&iblas, wa2, &blas1)); 385a7e14dcfSSatish Balay parc = -a[indef - 1 + (indef - 1) * lda] / (temp * temp); 386a7e14dcfSSatish Balay pars = PetscMax(pars, par + parc); 387a7e14dcfSSatish Balay 388a7e14dcfSSatish Balay /* If necessary, increase paru slightly. 389a7e14dcfSSatish Balay This is needed because in some exceptional situations 390a7e14dcfSSatish Balay paru is the optimal value of par. */ 391a7e14dcfSSatish Balay 392a7e14dcfSSatish Balay paru = PetscMax(paru, (1.0 + rtol) * pars); 393a7e14dcfSSatish Balay } 394a7e14dcfSSatish Balay 395a7e14dcfSSatish Balay /* Use pars to update parl */ 396a7e14dcfSSatish Balay parl = PetscMax(parl, pars); 397a7e14dcfSSatish Balay 398e4cb33bbSBarry Smith /* Test for converged. */ 399a7e14dcfSSatish Balay if (info == 0) { 400a7e14dcfSSatish Balay if (iter == itmax) info = 4; 401a7e14dcfSSatish Balay if (paru <= (1.0 + p5 * rtol) * pars) info = 3; 402a7e14dcfSSatish Balay if (paru == 0.0) info = 2; 403a7e14dcfSSatish Balay } 404a7e14dcfSSatish Balay 405a7e14dcfSSatish Balay /* If exiting, store the best approximation and restore 406a7e14dcfSSatish Balay the upper triangle of A. */ 407a7e14dcfSSatish Balay 408a7e14dcfSSatish Balay if (info != 0) { 409a7e14dcfSSatish Balay /* Compute the best current estimates for x and f. */ 410a7e14dcfSSatish Balay par = parf; 411a7e14dcfSSatish Balay f = -p5 * (rxnorm * rxnorm + par * xnorm * xnorm); 412a7e14dcfSSatish Balay if (rednc) { 413a7e14dcfSSatish Balay f = -p5 * (rxnorm * rxnorm + par * delta * delta - rznorm * rznorm); 414792fecdfSBarry Smith PetscCallBLAS("BLASaxpy", BLASaxpy_(&blasn, &alpha, z, &blas1, x, &blas1)); 415a7e14dcfSSatish Balay } 416a7e14dcfSSatish Balay /* Restore the upper triangle of A */ 417a7e14dcfSSatish Balay for (j = 0; j < n; j++) { 4189566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(n - j - 1, &iblas)); 419792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&iblas, &a[j + 1 + j * lda], &blas1, &a[j + (j + 1) * lda], &blaslda)); 420a7e14dcfSSatish Balay } 4219566063dSJacob Faibussowitsch PetscCall(PetscBLASIntCast(lda + 1, &iblas)); 422792fecdfSBarry Smith PetscCallBLAS("BLAScopy", BLAScopy_(&blasn, wa1, &blas1, a, &iblas)); 423a7e14dcfSSatish Balay break; 424a7e14dcfSSatish Balay } 425a7e14dcfSSatish Balay par = PetscMax(parl, par + parc); 426a7e14dcfSSatish Balay } 427a7e14dcfSSatish Balay *retpar = par; 428a7e14dcfSSatish Balay *retf = f; 429a7e14dcfSSatish Balay *retinfo = info; 430a7e14dcfSSatish Balay *retits = iter; 431a7e14dcfSSatish Balay CHKMEMQ; 432*3ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 433a7e14dcfSSatish Balay } 434