1*38774f0aSPeter Brune #include <../src/snes/impls/ngmres/snesngmres.h> /*I "petscsnes.h" I*/ 2*38774f0aSPeter Brune #include <petscblaslapack.h> 3*38774f0aSPeter Brune 4*38774f0aSPeter Brune #undef __FUNCT__ 5*38774f0aSPeter Brune #define __FUNCT__ "SNESNGMRESUpdateSubspace_Private" 6*38774f0aSPeter Brune PetscErrorCode SNESNGMRESUpdateSubspace_Private(SNES snes,PetscInt ivec,PetscInt l,Vec F,PetscReal fnorm,Vec X) 7*38774f0aSPeter Brune { 8*38774f0aSPeter Brune SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 9*38774f0aSPeter Brune Vec *Fdot = ngmres->Fdot; 10*38774f0aSPeter Brune Vec *Xdot = ngmres->Xdot; 11*38774f0aSPeter Brune PetscScalar *xi = ngmres->xi; 12*38774f0aSPeter Brune PetscInt i; 13*38774f0aSPeter Brune PetscReal nu; 14*38774f0aSPeter Brune PetscErrorCode ierr; 15*38774f0aSPeter Brune 16*38774f0aSPeter Brune PetscFunctionBegin; 17*38774f0aSPeter Brune if (ivec > l) SETERRQ2(((PetscObject)snes)->comm,PETSC_ERR_ARG_WRONGSTATE,"Cannot update vector %d with space size %d!",ivec,l); 18*38774f0aSPeter Brune ierr = VecCopy(F,Fdot[ivec]);CHKERRQ(ierr); 19*38774f0aSPeter Brune ierr = VecCopy(X,Xdot[ivec]);CHKERRQ(ierr); 20*38774f0aSPeter Brune ngmres->fnorms[ivec] = fnorm; 21*38774f0aSPeter Brune if (l > 0) { 22*38774f0aSPeter Brune ierr = VecMDot(F,l,Fdot,xi);CHKERRQ(ierr); 23*38774f0aSPeter Brune for (i = 0;i < l;i++) { 24*38774f0aSPeter Brune Q(i,ivec) = xi[i]; 25*38774f0aSPeter Brune Q(ivec,i) = xi[i]; 26*38774f0aSPeter Brune } 27*38774f0aSPeter Brune } else { 28*38774f0aSPeter Brune nu = fnorm*fnorm; 29*38774f0aSPeter Brune Q(0,0) = nu; 30*38774f0aSPeter Brune } 31*38774f0aSPeter Brune PetscFunctionReturn(0); 32*38774f0aSPeter Brune } 33*38774f0aSPeter Brune 34*38774f0aSPeter Brune #undef __FUNCT__ 35*38774f0aSPeter Brune #define __FUNCT__ "SNESNGMRESFormCombinedSolution_Private" 36*38774f0aSPeter Brune PetscErrorCode SNESNGMRESFormCombinedSolution_Private(SNES snes,PetscInt l,Vec XM,Vec FM,PetscReal fMnorm,Vec X,Vec XA,Vec FA) 37*38774f0aSPeter Brune { 38*38774f0aSPeter Brune SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 39*38774f0aSPeter Brune PetscInt i,j; 40*38774f0aSPeter Brune Vec *Fdot = ngmres->Fdot; 41*38774f0aSPeter Brune Vec *Xdot = ngmres->Xdot; 42*38774f0aSPeter Brune PetscScalar *beta = ngmres->beta; 43*38774f0aSPeter Brune PetscScalar *xi = ngmres->xi; 44*38774f0aSPeter Brune PetscScalar alph_total = 0.; 45*38774f0aSPeter Brune PetscErrorCode ierr; 46*38774f0aSPeter Brune PetscReal nu; 47*38774f0aSPeter Brune Vec Y = snes->work[2]; 48*38774f0aSPeter Brune PetscBool changed_y,changed_w; 49*38774f0aSPeter Brune 50*38774f0aSPeter Brune PetscFunctionBegin; 51*38774f0aSPeter Brune nu = fMnorm*fMnorm; 52*38774f0aSPeter Brune 53*38774f0aSPeter Brune /* construct the right hand side and xi factors */ 54*38774f0aSPeter Brune ierr = VecMDot(FM,l,Fdot,xi);CHKERRQ(ierr); 55*38774f0aSPeter Brune for (i = 0; i < l; i++) { 56*38774f0aSPeter Brune beta[i] = nu - xi[i]; 57*38774f0aSPeter Brune } 58*38774f0aSPeter Brune /* construct h */ 59*38774f0aSPeter Brune for (j = 0; j < l; j++) { 60*38774f0aSPeter Brune for (i = 0; i < l; i++) { 61*38774f0aSPeter Brune H(i,j) = Q(i,j)-xi[i]-xi[j]+nu; 62*38774f0aSPeter Brune } 63*38774f0aSPeter Brune } 64*38774f0aSPeter Brune if (l == 1) { 65*38774f0aSPeter Brune /* simply set alpha[0] = beta[0] / H[0, 0] */ 66*38774f0aSPeter Brune if (H(0,0) != 0.) { 67*38774f0aSPeter Brune beta[0] = beta[0]/H(0,0); 68*38774f0aSPeter Brune } else { 69*38774f0aSPeter Brune beta[0] = 0.; 70*38774f0aSPeter Brune } 71*38774f0aSPeter Brune } else { 72*38774f0aSPeter Brune #if defined(PETSC_MISSING_LAPACK_GELSS) 73*38774f0aSPeter Brune SETERRQ(((PetscObject)snes)->comm,PETSC_ERR_SUP,"NGMRES with LS requires the LAPACK GELSS routine."); 74*38774f0aSPeter Brune #else 75*38774f0aSPeter Brune ierr = PetscBLASIntCast(l,&ngmres->m);CHKERRQ(ierr); 76*38774f0aSPeter Brune ierr = PetscBLASIntCast(l,&ngmres->n);CHKERRQ(ierr); 77*38774f0aSPeter Brune ngmres->info = 0; 78*38774f0aSPeter Brune ngmres->rcond = -1.; 79*38774f0aSPeter Brune ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 80*38774f0aSPeter Brune #if defined(PETSC_USE_COMPLEX) 81*38774f0aSPeter Brune LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,ngmres->rwork,&ngmres->info); 82*38774f0aSPeter Brune #else 83*38774f0aSPeter Brune LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,&ngmres->info); 84*38774f0aSPeter Brune #endif 85*38774f0aSPeter Brune ierr = PetscFPTrapPop();CHKERRQ(ierr); 86*38774f0aSPeter Brune if (ngmres->info < 0) SETERRQ(((PetscObject)snes)->comm,PETSC_ERR_LIB,"Bad argument to GELSS"); 87*38774f0aSPeter Brune if (ngmres->info > 0) SETERRQ(((PetscObject)snes)->comm,PETSC_ERR_LIB,"SVD failed to converge"); 88*38774f0aSPeter Brune #endif 89*38774f0aSPeter Brune } 90*38774f0aSPeter Brune for (i=0;i<l;i++) { 91*38774f0aSPeter Brune if (PetscIsInfOrNanScalar(beta[i]))SETERRQ(((PetscObject)snes)->comm,PETSC_ERR_LIB,"SVD generated inconsistent output"); 92*38774f0aSPeter Brune } 93*38774f0aSPeter Brune alph_total = 0.; 94*38774f0aSPeter Brune for (i = 0; i < l; i++) { 95*38774f0aSPeter Brune alph_total += beta[i]; 96*38774f0aSPeter Brune } 97*38774f0aSPeter Brune ierr = VecCopy(XM,XA);CHKERRQ(ierr); 98*38774f0aSPeter Brune ierr = VecScale(XA,1.-alph_total);CHKERRQ(ierr); 99*38774f0aSPeter Brune ierr = VecMAXPY(XA,l,beta,Xdot);CHKERRQ(ierr); 100*38774f0aSPeter Brune /* check the validity of the step */ 101*38774f0aSPeter Brune ierr = VecCopy(XA,Y);CHKERRQ(ierr); 102*38774f0aSPeter Brune ierr = VecAXPY(Y,-1.0,X);CHKERRQ(ierr); 103*38774f0aSPeter Brune ierr = SNESLineSearchPostCheck(snes->linesearch,X,Y,XA,&changed_y,&changed_w);CHKERRQ(ierr); 104*38774f0aSPeter Brune ierr = SNESComputeFunction(snes,XA,FA);CHKERRQ(ierr); 105*38774f0aSPeter Brune PetscFunctionReturn(0); 106*38774f0aSPeter Brune } 107*38774f0aSPeter Brune 108*38774f0aSPeter Brune #undef __FUNCT__ 109*38774f0aSPeter Brune #define __FUNCT__ "SNESNGMRESCalculateDifferences_Private" 110*38774f0aSPeter Brune PetscErrorCode SNESNGMRESCalculateDifferences_Private(SNES snes,PetscInt l,Vec X,Vec F,Vec XM,Vec FM,Vec XA,Vec FA,Vec D,PetscReal *dnorm,PetscReal *dminnorm,PetscReal *fAnorm) 111*38774f0aSPeter Brune { 112*38774f0aSPeter Brune PetscErrorCode ierr; 113*38774f0aSPeter Brune SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 114*38774f0aSPeter Brune PetscReal dcurnorm; 115*38774f0aSPeter Brune Vec *Xdot = ngmres->Xdot; 116*38774f0aSPeter Brune PetscInt i; 117*38774f0aSPeter Brune 118*38774f0aSPeter Brune PetscFunctionBegin; 119*38774f0aSPeter Brune if (ngmres->singlereduction) { 120*38774f0aSPeter Brune *dminnorm = -1.0; 121*38774f0aSPeter Brune if (fAnorm) { 122*38774f0aSPeter Brune ierr = VecNormBegin(FA,NORM_2,fAnorm);CHKERRQ(ierr); 123*38774f0aSPeter Brune } 124*38774f0aSPeter Brune if (dnorm) { 125*38774f0aSPeter Brune ierr=VecCopy(XA,D);CHKERRQ(ierr); 126*38774f0aSPeter Brune ierr=VecAXPY(D,-1.0,XM);CHKERRQ(ierr); 127*38774f0aSPeter Brune ierr = VecNormBegin(D,NORM_2,dnorm);CHKERRQ(ierr); 128*38774f0aSPeter Brune } 129*38774f0aSPeter Brune if (dminnorm) { 130*38774f0aSPeter Brune for (i=0;i<l;i++) { 131*38774f0aSPeter Brune ierr=VecAXPY(Xdot[i],-1.0,XA);CHKERRQ(ierr); 132*38774f0aSPeter Brune } 133*38774f0aSPeter Brune for (i=0;i<l;i++) { 134*38774f0aSPeter Brune ierr = VecNormBegin(Xdot[i],NORM_2,&ngmres->xnorms[i]);CHKERRQ(ierr); 135*38774f0aSPeter Brune } 136*38774f0aSPeter Brune } 137*38774f0aSPeter Brune if (fAnorm) {ierr = VecNormEnd(FA,NORM_2,fAnorm);CHKERRQ(ierr);} 138*38774f0aSPeter Brune if (dnorm) {ierr = VecNormEnd(D,NORM_2,dnorm);CHKERRQ(ierr);} 139*38774f0aSPeter Brune if (dminnorm) { 140*38774f0aSPeter Brune for (i=0;i<l;i++) { 141*38774f0aSPeter Brune ierr = VecNormEnd(Xdot[i],NORM_2,&ngmres->xnorms[i]);CHKERRQ(ierr); 142*38774f0aSPeter Brune } 143*38774f0aSPeter Brune for (i=0;i<l;i++) { 144*38774f0aSPeter Brune dcurnorm = ngmres->xnorms[i]; 145*38774f0aSPeter Brune if ((dcurnorm < *dminnorm) || (*dminnorm < 0.0)) *dminnorm = dcurnorm; 146*38774f0aSPeter Brune ierr=VecAXPY(Xdot[i],1.0,XA);CHKERRQ(ierr); 147*38774f0aSPeter Brune } 148*38774f0aSPeter Brune } 149*38774f0aSPeter Brune } else { 150*38774f0aSPeter Brune if (dnorm) { 151*38774f0aSPeter Brune ierr=VecCopy(XA,D);CHKERRQ(ierr); 152*38774f0aSPeter Brune ierr=VecAXPY(D,-1.0,XM);CHKERRQ(ierr); 153*38774f0aSPeter Brune ierr=VecNormBegin(D,NORM_2,dnorm);CHKERRQ(ierr); 154*38774f0aSPeter Brune } 155*38774f0aSPeter Brune if (fAnorm) { 156*38774f0aSPeter Brune ierr=VecNormBegin(FA,NORM_2,fAnorm);CHKERRQ(ierr); 157*38774f0aSPeter Brune } 158*38774f0aSPeter Brune if (dnorm) { 159*38774f0aSPeter Brune ierr=VecNormEnd(D,NORM_2,dnorm);CHKERRQ(ierr); 160*38774f0aSPeter Brune } 161*38774f0aSPeter Brune if (fAnorm) { 162*38774f0aSPeter Brune ierr=VecNormEnd(FA,NORM_2,fAnorm);CHKERRQ(ierr); 163*38774f0aSPeter Brune } 164*38774f0aSPeter Brune if (dminnorm) { 165*38774f0aSPeter Brune *dminnorm = -1.0; 166*38774f0aSPeter Brune for (i=0;i<l;i++) { 167*38774f0aSPeter Brune ierr=VecCopy(XA,D);CHKERRQ(ierr); 168*38774f0aSPeter Brune ierr=VecAXPY(D,-1.0,Xdot[i]);CHKERRQ(ierr); 169*38774f0aSPeter Brune ierr=VecNorm(D,NORM_2,&dcurnorm);CHKERRQ(ierr); 170*38774f0aSPeter Brune if ((dcurnorm < *dminnorm) || (*dminnorm < 0.0)) *dminnorm = dcurnorm; 171*38774f0aSPeter Brune } 172*38774f0aSPeter Brune } 173*38774f0aSPeter Brune } 174*38774f0aSPeter Brune PetscFunctionReturn(0); 175*38774f0aSPeter Brune } 176*38774f0aSPeter Brune 177*38774f0aSPeter Brune #undef __FUNCT__ 178*38774f0aSPeter Brune #define __FUNCT__ "SNESNGMRESSelect_Private" 179*38774f0aSPeter Brune PetscErrorCode SNESNGMRESSelect_Private(SNES snes,PetscInt k_restart,Vec XM,Vec FM,PetscReal fMnorm,Vec XA,Vec FA,PetscReal fAnorm,PetscReal dnorm,PetscReal fminnorm,PetscReal dminnorm,Vec X,Vec F,Vec Y,PetscReal *fnorm) 180*38774f0aSPeter Brune { 181*38774f0aSPeter Brune SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 182*38774f0aSPeter Brune PetscErrorCode ierr; 183*38774f0aSPeter Brune PetscBool lssucceed,selectA; 184*38774f0aSPeter Brune 185*38774f0aSPeter Brune PetscFunctionBegin; 186*38774f0aSPeter Brune if (ngmres->select_type == SNES_NGMRES_SELECT_LINESEARCH) { 187*38774f0aSPeter Brune /* X = X + \lambda(XA - X) */ 188*38774f0aSPeter Brune if (ngmres->monitor) { 189*38774f0aSPeter Brune ierr = PetscViewerASCIIPrintf(ngmres->monitor,"||F_A||_2 = %e, ||F_M||_2 = %e\n",fAnorm,fMnorm);CHKERRQ(ierr); 190*38774f0aSPeter Brune } 191*38774f0aSPeter Brune ierr = VecCopy(FM,F);CHKERRQ(ierr); 192*38774f0aSPeter Brune ierr = VecCopy(XM,X);CHKERRQ(ierr); 193*38774f0aSPeter Brune ierr = VecCopy(XA,Y);CHKERRQ(ierr); 194*38774f0aSPeter Brune ierr = VecAYPX(Y,-1.0,X);CHKERRQ(ierr); 195*38774f0aSPeter Brune *fnorm = fMnorm; 196*38774f0aSPeter Brune ierr = SNESLineSearchApply(ngmres->additive_linesearch,X,F,fnorm,Y);CHKERRQ(ierr); 197*38774f0aSPeter Brune ierr = SNESLineSearchGetSuccess(ngmres->additive_linesearch,&lssucceed);CHKERRQ(ierr); 198*38774f0aSPeter Brune if (!lssucceed) { 199*38774f0aSPeter Brune if (++snes->numFailures >= snes->maxFailures) { 200*38774f0aSPeter Brune snes->reason = SNES_DIVERGED_LINE_SEARCH; 201*38774f0aSPeter Brune PetscFunctionReturn(0); 202*38774f0aSPeter Brune } 203*38774f0aSPeter Brune } 204*38774f0aSPeter Brune if (ngmres->monitor) { 205*38774f0aSPeter Brune ierr = PetscViewerASCIIPrintf(ngmres->monitor,"Additive solution: ||F||_2 = %e\n",*fnorm);CHKERRQ(ierr); 206*38774f0aSPeter Brune } 207*38774f0aSPeter Brune } else if (ngmres->select_type == SNES_NGMRES_SELECT_DIFFERENCE) { 208*38774f0aSPeter Brune selectA = PETSC_TRUE; 209*38774f0aSPeter Brune /* Conditions for choosing the accelerated answer */ 210*38774f0aSPeter Brune /* Criterion A -- the norm of the function isn't increased above the minimum by too much */ 211*38774f0aSPeter Brune if (fAnorm >= ngmres->gammaA*fminnorm) { 212*38774f0aSPeter Brune selectA = PETSC_FALSE; 213*38774f0aSPeter Brune } 214*38774f0aSPeter Brune /* Criterion B -- the choice of x^A isn't too close to some other choice */ 215*38774f0aSPeter Brune if (ngmres->epsilonB*dnorm<dminnorm || PetscSqrtReal(*fnorm)<ngmres->deltaB*PetscSqrtReal(fminnorm)) { 216*38774f0aSPeter Brune } else { 217*38774f0aSPeter Brune selectA=PETSC_FALSE; 218*38774f0aSPeter Brune } 219*38774f0aSPeter Brune if (selectA) { 220*38774f0aSPeter Brune if (ngmres->monitor) { 221*38774f0aSPeter Brune ierr = PetscViewerASCIIPrintf(ngmres->monitor,"picked X_A, ||F_A||_2 = %e, ||F_M||_2 = %e\n",fAnorm,fMnorm);CHKERRQ(ierr); 222*38774f0aSPeter Brune } 223*38774f0aSPeter Brune /* copy it over */ 224*38774f0aSPeter Brune *fnorm = fAnorm; 225*38774f0aSPeter Brune ierr = VecCopy(FA,F);CHKERRQ(ierr); 226*38774f0aSPeter Brune ierr = VecCopy(XA,X);CHKERRQ(ierr); 227*38774f0aSPeter Brune } else { 228*38774f0aSPeter Brune if (ngmres->monitor) { 229*38774f0aSPeter Brune ierr = PetscViewerASCIIPrintf(ngmres->monitor,"picked X_M, ||F_A||_2 = %e, ||F_M||_2 = %e\n",fAnorm,fMnorm);CHKERRQ(ierr); 230*38774f0aSPeter Brune } 231*38774f0aSPeter Brune *fnorm = fMnorm; 232*38774f0aSPeter Brune ierr = VecCopy(XM,Y);CHKERRQ(ierr); 233*38774f0aSPeter Brune ierr = VecAXPY(Y,-1.0,X);CHKERRQ(ierr); 234*38774f0aSPeter Brune ierr = VecCopy(FM,F);CHKERRQ(ierr); 235*38774f0aSPeter Brune ierr = VecCopy(XM,X);CHKERRQ(ierr); 236*38774f0aSPeter Brune } 237*38774f0aSPeter Brune } else { /* none */ 238*38774f0aSPeter Brune *fnorm = fAnorm; 239*38774f0aSPeter Brune ierr = VecCopy(FA,F);CHKERRQ(ierr); 240*38774f0aSPeter Brune ierr = VecCopy(XA,X);CHKERRQ(ierr); 241*38774f0aSPeter Brune } 242*38774f0aSPeter Brune PetscFunctionReturn(0); 243*38774f0aSPeter Brune } 244*38774f0aSPeter Brune 245*38774f0aSPeter Brune #undef __FUNCT__ 246*38774f0aSPeter Brune #define __FUNCT__ "SNESNGMRESSelectRestart_Private" 247*38774f0aSPeter Brune PetscErrorCode SNESNGMRESSelectRestart_Private(SNES snes,PetscReal fAnorm,PetscReal dnorm,PetscReal fminnorm,PetscReal dminnorm,PetscBool *selectRestart) 248*38774f0aSPeter Brune { 249*38774f0aSPeter Brune SNES_NGMRES *ngmres = (SNES_NGMRES*)snes->data; 250*38774f0aSPeter Brune PetscErrorCode ierr; 251*38774f0aSPeter Brune 252*38774f0aSPeter Brune PetscFunctionBegin; 253*38774f0aSPeter Brune *selectRestart = PETSC_FALSE; 254*38774f0aSPeter Brune /* difference stagnation restart */ 255*38774f0aSPeter Brune if ((ngmres->epsilonB*dnorm > dminnorm) && (PetscSqrtReal(fAnorm) > ngmres->deltaB*PetscSqrtReal(fminnorm))) { 256*38774f0aSPeter Brune if (ngmres->monitor) { 257*38774f0aSPeter Brune ierr = PetscViewerASCIIPrintf(ngmres->monitor,"difference restart: %e > %e\n",ngmres->epsilonB*dnorm,dminnorm);CHKERRQ(ierr); 258*38774f0aSPeter Brune } 259*38774f0aSPeter Brune *selectRestart = PETSC_TRUE; 260*38774f0aSPeter Brune } 261*38774f0aSPeter Brune /* residual stagnation restart */ 262*38774f0aSPeter Brune if (PetscSqrtReal(fAnorm) > ngmres->gammaC*PetscSqrtReal(fminnorm)) { 263*38774f0aSPeter Brune if (ngmres->monitor) { 264*38774f0aSPeter Brune ierr = PetscViewerASCIIPrintf(ngmres->monitor,"residual restart: %e > %e\n",PetscSqrtReal(fAnorm),ngmres->gammaC*PetscSqrtReal(fminnorm));CHKERRQ(ierr); 265*38774f0aSPeter Brune } 266*38774f0aSPeter Brune *selectRestart = PETSC_TRUE; 267*38774f0aSPeter Brune } 268*38774f0aSPeter Brune PetscFunctionReturn(0); 269*38774f0aSPeter Brune } 270