1 /* Defines the basic SNES object */ 2 #include <private/snesimpl.h> 3 4 static PetscErrorCode BuildNGmresSoln(PetscScalar*,Vec,SNES,PetscInt,PetscInt); 5 6 /* Private structure for the Anderson mixing method aka nonlinear Krylov */ 7 typedef struct { 8 PetscScalar *hh_origin; 9 Vec *v, *w, *q; 10 PetscReal *f2; /* 2-norms of function (residual) at each stage */ 11 PetscInt msize; /* maximum size of space */ 12 PetscInt csize; /* current size of space */ 13 PetscScalar beta; /* relaxation parameter */ 14 PetscScalar *nrs; /* temp that holds the coefficients of the Krylov vectors that form the minimum residual solution */ 15 16 } SNES_NGMRES; 17 18 #define HH(a,b) (ngmres->hh_origin + (a)*(ngmres->msize)+(b)) 19 20 #undef __FUNCT__ 21 #define __FUNCT__ "SNESReset_NGMRES" 22 PetscErrorCode SNESReset_NGMRES(SNES snes) 23 { 24 SNES_NGMRES *ngmres = (SNES_NGMRES*) snes->data; 25 PetscErrorCode ierr; 26 27 PetscFunctionBegin; 28 ierr = VecDestroyVecs(ngmres->msize, &ngmres->v);CHKERRQ(ierr); 29 ierr = VecDestroyVecs(ngmres->msize, &ngmres->w);CHKERRQ(ierr); 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "SNESDestroy_NGMRES" 35 PetscErrorCode SNESDestroy_NGMRES(SNES snes) 36 { 37 PetscErrorCode ierr; 38 39 PetscFunctionBegin; 40 ierr = SNESReset_NGMRES(snes);CHKERRQ(ierr); 41 if (snes->work) {ierr = VecDestroyVecs(snes->nwork, &snes->work);CHKERRQ(ierr);} 42 PetscFunctionReturn(0); 43 } 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "SNESSetUp_NGMRES" 47 PetscErrorCode SNESSetUp_NGMRES(SNES snes) 48 { 49 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 50 PetscInt msize,hh; 51 PetscErrorCode ierr; 52 53 PetscFunctionBegin; 54 #if 0 55 if (snes->pc_side != PC_LEFT) {SETERRQ(((PetscObject) snes)->comm, PETSC_ERR_SUP, "Only left preconditioning allowed for SNESNGMRES");} 56 #endif 57 ngmres->beta = 1.0; 58 msize = ngmres->msize; /* restart size */ 59 hh = msize * msize; 60 ierr = PetscMalloc2(hh,PetscScalar,&ngmres->hh_origin,msize,PetscScalar,&ngmres->nrs);CHKERRQ(ierr); 61 62 ierr = PetscMemzero(ngmres->hh_origin,hh*sizeof(PetscScalar));CHKERRQ(ierr); 63 ierr = PetscMemzero(ngmres->nrs,msize*sizeof(PetscScalar));CHKERRQ(ierr); 64 65 // ierr = PetscLogObjectMemory(ksp,(hh+msize)*sizeof(PetscScalar));CHKERRQ(ierr); 66 67 ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->v);CHKERRQ(ierr); 68 ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->w);CHKERRQ(ierr); 69 ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->q);CHKERRQ(ierr); 70 ierr = SNESDefaultGetWork(snes, 2);CHKERRQ(ierr); 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNCT__ 75 #define __FUNCT__ "SNESSetFromOptions_NGMRES" 76 PetscErrorCode SNESSetFromOptions_NGMRES(SNES snes) 77 { 78 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 79 PetscErrorCode ierr; 80 81 PetscFunctionBegin; 82 ierr = PetscOptionsHead("SNES NGMRES options");CHKERRQ(ierr); 83 ierr = PetscOptionsInt("-snes_gmres_restart", "Number of directions", "SNES", ngmres->msize, &ngmres->msize, PETSC_NULL);CHKERRQ(ierr); 84 ierr = PetscOptionsTail();CHKERRQ(ierr); 85 PetscFunctionReturn(0); 86 } 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "SNESView_NGMRES" 90 PetscErrorCode SNESView_NGMRES(SNES snes, PetscViewer viewer) 91 { 92 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 93 PetscBool iascii; 94 PetscErrorCode ierr; 95 96 PetscFunctionBegin; 97 ierr = PetscTypeCompare((PetscObject) viewer, PETSCVIEWERASCII, &iascii);CHKERRQ(ierr); 98 if (iascii) { 99 ierr = PetscViewerASCIIPrintf(viewer, " Size of space %d\n", ngmres->msize);CHKERRQ(ierr); 100 } 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "SNESSolve_NGMRES" 106 PetscErrorCode SNESSolve_NGMRES(SNES snes) 107 { 108 SNES pc; 109 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 110 Vec X,F, Fold, Xold,temp,*dX = ngmres->w,*dF = ngmres->v; 111 PetscScalar *nrs=ngmres->nrs; 112 PetscReal gnorm; 113 PetscInt i, j, k, l, flag, it; 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 snes->reason = SNES_CONVERGED_ITERATING; 118 X = snes->vec_sol; 119 F = snes->vec_func; 120 Fold = snes->work[0]; 121 Xold = snes->work[1]; 122 ierr = VecDuplicate(X,&Xold);CHKERRQ(ierr); 123 ierr = VecDuplicate(X,&temp);CHKERRQ(ierr); 124 125 ierr = SNESGetPC(snes, &pc);CHKERRQ(ierr); 126 ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); 127 snes->iter = 0; 128 snes->norm = 0.; 129 ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); 130 ierr = SNESComputeFunction(snes, X, temp);CHKERRQ(ierr); /* r = F(x) */ 131 #if 0 132 ierr = SNESSolve(snes->pc, temp, F);CHKERRQ(ierr); /* p = P(r) */ 133 #else 134 ierr = VecCopy(temp, F);CHKERRQ(ierr); /* p = r */ 135 #endif 136 137 if (snes->domainerror) { 138 snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; 139 PetscFunctionReturn(0); 140 } 141 ierr = VecNorm(F, NORM_2, &gnorm);CHKERRQ(ierr); /* fnorm = ||r|| */ 142 if (PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_FP, "Infinite or not-a-number generated in norm"); 143 144 ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); 145 snes->norm = gnorm; 146 ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); 147 SNESLogConvHistory(snes, gnorm, 0); 148 ierr = SNESMonitor(snes, 0, gnorm);CHKERRQ(ierr); 149 150 /* set parameter for default relative tolerance convergence test */ 151 snes->ttol = gnorm*snes->rtol; 152 /* test convergence */ 153 ierr = (*snes->ops->converged)(snes,0,0.0,0.0,gnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); 154 if (snes->reason) PetscFunctionReturn(0); 155 156 /* for k=0 */ 157 158 k=0; 159 ierr= VecCopy(X,Xold); CHKERRQ(ierr); /* Xbar_0= X_0 */ 160 ierr= VecCopy(F,Fold); CHKERRQ(ierr); /* Fbar_0 = f_0= B(b-Ax_0) */ 161 ierr= VecWAXPY(X, ngmres->beta, Fold, Xold); CHKERRQ(ierr); /*X_1 = X_bar + beta * Fbar */ 162 163 /* to calculate f_1 */ 164 ierr = SNESComputeFunction(snes, X, temp);CHKERRQ(ierr); /* r = F(x) */ 165 #if 0 166 ierr = SNESSolve(snes->pc, temp, F);CHKERRQ(ierr); /* p = P(r) */ 167 #else 168 ierr = VecCopy(temp, F);CHKERRQ(ierr); /* p = r */ 169 #endif 170 171 172 /* calculate dX and dF for k=0 */ 173 ierr= VecWAXPY(dX[k],-1.0, Xold, X); CHKERRQ(ierr); /* dX= X_1 - X_0 */ 174 ierr= VecWAXPY(dF[k],-1.0, Fold, F); CHKERRQ(ierr); /* dF= f_1 - f_0 */ 175 176 177 ierr= VecCopy(X,Xold); CHKERRQ(ierr); /* Xbar_0= X_0 */ 178 ierr= VecCopy(F,Fold); CHKERRQ(ierr); /* Fbar_0 = f_0= B(b-Ax_0) */ 179 180 flag=0; 181 182 183 184 for (k=1; k<snes->max_its; k += 1) { /* begin the iteration */ 185 l=ngmres->msize; 186 if(k<l) l=k; 187 it=l-1; 188 189 ierr = BuildNGmresSoln(nrs,Fold,snes,it,flag);CHKERRQ(ierr); 190 191 192 193 /* to obtain the solution at k+1 step */ 194 ierr= VecCopy(Xold,X); CHKERRQ(ierr); /* X=Xold+Fold-(dX + dF) *nrd */ 195 ierr= VecAXPY(X,1.0,Fold); CHKERRQ(ierr); /* X= Xold+Fold */ 196 for(i=0;i<l;i++){ /*X= Xold+Fold- (dX+dF*beta) *innerb */ 197 ierr= VecAXPY(X,-nrs[i], dX[i]);CHKERRQ(ierr); 198 ierr= VecAXPY(X,-nrs[i]*ngmres->beta, dF[i]);CHKERRQ(ierr); 199 } 200 201 202 /* to test with GMRES */ 203 //ierr= VecCopy(Xold,temp); CHKERRQ(ierr); /* X=Xold-(dX ) *nrd */ 204 /*for(i=0;i<l;i++){ 205 ierr= VecAXPY(temp,-nrs[i], dX[i]);CHKERRQ(ierr); 206 } 207 ierr = KSP_MatMult(ksp,Amat,temp,R);CHKERRQ(ierr); 208 ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr); 209 ierr = PCApply(ksp->pc,R,F);CHKERRQ(ierr); 210 ierr = VecNorm(R,NORM_2,&gnorm);CHKERRQ(ierr); 211 printf("gmres residual norm=%g\n",gnorm); 212 */ 213 214 /* to calculate f_k+1 */ 215 ierr = SNESComputeFunction(snes, X, temp);CHKERRQ(ierr); /* r = F(x) */ 216 #if 0 217 ierr = SNESSolve(snes->pc, temp, F);CHKERRQ(ierr); /* p = P(r) */ 218 #else 219 ierr = VecCopy(temp, F);CHKERRQ(ierr); /* p = r */ 220 #endif 221 222 223 224 /* check the convegence */ 225 ierr = VecNorm(F, NORM_2, &gnorm);CHKERRQ(ierr); /* fnorm = ||r|| */ 226 SNESLogConvHistory(snes, gnorm, k); 227 ierr = SNESMonitor(snes, k, gnorm);CHKERRQ(ierr); 228 snes->iter =k; 229 ierr = (*snes->ops->converged)(snes,0,0.0,0.0,gnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); 230 231 if (snes->reason) PetscFunctionReturn(0); 232 233 234 235 /* calculate dX and dF for k=0 */ 236 if( k>l) {/* we need to replace the old vectors */ 237 flag=1; 238 for(i=0;i<l-1;i++){ 239 ierr= VecCopy(dX[i+1],dX[i]); CHKERRQ(ierr); /* X=Xold+Fold-(dX + dF) *nrd */ 240 ierr= VecCopy(dF[i+1],dF[i]); CHKERRQ(ierr); /* X=Xold+Fold-(dX + dF) *nrd */ 241 for(j=0;j<l;j++) 242 *HH(j,i)=*HH(j,i+1); 243 } 244 } 245 ierr= VecWAXPY(dX[l],-1.0, Xold, X); CHKERRQ(ierr); /* dX= X_1 - X_0 */ 246 ierr= VecWAXPY(dF[l],-1.0, Fold, F); CHKERRQ(ierr); /* dF= f_1 - f_0 */ 247 ierr= VecCopy(X,Xold); CHKERRQ(ierr); 248 ierr= VecCopy(F,Fold); CHKERRQ(ierr); 249 250 } 251 snes->reason = SNES_DIVERGED_MAX_IT; 252 PetscFunctionReturn(0); 253 } 254 255 /*MC 256 SNESNGMRES - The Nonlinear Generalized Minimum Residual (NGMRES) method of Oosterlee and Washio. 257 258 Level: beginner 259 260 Notes: Supports only left preconditioning 261 262 "Krylov Subspace Acceleration of Nonlinear Multigrid with Application to Recirculating Flows", C. W. Oosterlee and T. Washio, 263 SIAM Journal on Scientific Computing, 21(5), 2000. 264 265 .seealso: SNESCreate(), SNES, SNESSetType(), SNESType (for list of available types) 266 M*/ 267 EXTERN_C_BEGIN 268 #undef __FUNCT__ 269 #define __FUNCT__ "SNESCreate_NGMRES" 270 PetscErrorCode SNESCreate_NGMRES(SNES snes) 271 { 272 SNES_NGMRES *ngmres; 273 PetscErrorCode ierr; 274 275 PetscFunctionBegin; 276 snes->ops->destroy = SNESDestroy_NGMRES; 277 snes->ops->setup = SNESSetUp_NGMRES; 278 snes->ops->setfromoptions = SNESSetFromOptions_NGMRES; 279 snes->ops->view = SNESView_NGMRES; 280 snes->ops->solve = SNESSolve_NGMRES; 281 snes->ops->reset = SNESReset_NGMRES; 282 283 ierr = PetscNewLog(snes, SNES_NGMRES, &ngmres);CHKERRQ(ierr); 284 snes->data = (void*) ngmres; 285 ngmres->msize = 30; 286 ngmres->csize = 0; 287 288 ierr = SNESGetPC(snes, &snes->pc);CHKERRQ(ierr); 289 #if 0 290 if (ksp->pc_side != PC_LEFT) {ierr = PetscInfo(ksp,"WARNING! Setting PC_SIDE for NGMRES to left!\n");CHKERRQ(ierr);} 291 snes->pc_side = PC_LEFT; 292 #endif 293 PetscFunctionReturn(0); 294 } 295 EXTERN_C_END 296 297 298 299 /* 300 BuildNGmresSoln - create the solution of the least square problem to determine the coefficients 301 302 Input parameters: 303 nrs - the solution 304 Fold - the RHS vector 305 flag - =1, we need to replace the old vector by new one, =0, we still add new vector 306 This is an internal routine that knows about the NGMRES internals. 307 */ 308 #undef __FUNCT__ 309 #define __FUNCT__ "BuildNGmresSoln" 310 static PetscErrorCode BuildNGmresSoln(PetscScalar* nrs, Vec Fold, SNES snes,PetscInt it, PetscInt flag) 311 { 312 PetscScalar tt,temps; 313 PetscErrorCode ierr; 314 PetscInt i,ii,j,l; 315 SNES_NGMRES *ngmres = (SNES_NGMRES *)(snes->data); 316 Vec *dF=ngmres->v, *Q=ngmres->q,temp; 317 PetscReal gam,areal; 318 PetscScalar a,b,c,s; 319 320 PetscFunctionBegin; 321 ierr = VecDuplicate(Fold,&temp);CHKERRQ(ierr); 322 l=it+1; 323 324 /* Solve for solution vector that minimizes the residual */ 325 326 if(flag==1) { // we need to replace the old vector and need to modify the QR factors, use Givens rotation 327 for(i=0;i<it;i++){ 328 /* calculate the Givens rotation */ 329 a=*HH(i,i); 330 b=*HH(i+1,i); 331 gam=1.0/PetscRealPart(PetscSqrtScalar(PetscConj(a)*a+PetscConj(b)*b)); 332 c= a*gam; 333 s= b*gam; 334 /* update the Q factor */ 335 ierr= VecCopy(Q[i],temp); CHKERRQ(ierr); 336 ierr = VecAXPBY(temp,s,c,Q[i+1]);CHKERRQ(ierr); /*temp= c*Q[i]+s*Q[i+1] */ 337 ierr = VecAXPBY(Q[i+1],-s,c,Q[i]);CHKERRQ(ierr); /* Q[i+1]= -s*Q[i] + c*Q[i+1] */ 338 ierr= VecCopy(temp,Q[i]); CHKERRQ(ierr); /* Q[i]= c*Q[i] + s*Q[i+1] */ 339 /* update the R factor */ 340 for(j=0;j<l;j++){ 341 a= *HH(i,j); 342 b=*HH(i+1,j); 343 temps=c* a+s* b; 344 *HH(i+1,j)=-s*a+c*b; 345 *HH(i,j)=temps; 346 } 347 } 348 } 349 350 // add a new vector, use modified Gram-Schmidt 351 ierr= VecCopy(dF[it],temp); CHKERRQ(ierr); 352 for(i=0;i<it;i++){ 353 ierr=VecDot(temp,Q[i],HH(i,it));CHKERRQ(ierr); /* h(i,l-1)= dF[l-1]'*Q[i] */ 354 ierr = VecAXPBY(temp,-*HH(i,it),1.0,Q[i]);CHKERRQ(ierr); /* temp= temp- h(i,l-1)*Q[i] */ 355 } 356 ierr=VecCopy(temp,Q[it]);CHKERRQ(ierr); 357 ierr=VecNormalize(Q[it],&areal);CHKERRQ(ierr); 358 *HH(it,it) = a = areal; 359 360 361 362 /* modify the RHS with Q'*Fold*/ 363 364 for(i=0;i<l;i++) 365 ierr=VecDot(Fold,Q[i],ngmres->nrs+i);CHKERRQ(ierr); /* nrs= Fold'*Q[i] */ 366 367 /* start backsubstitution to solve the least square problem */ 368 369 if (*HH(it,it) != 0.0) { 370 nrs[it] = nrs[it]/ *HH(it,it); 371 } else { 372 snes->reason = SNES_DIVERGED_MAX_IT; 373 ierr = PetscInfo2(snes,"Likely your matrix or preconditioner is singular. HH(it,it) is identically zero; it = %D nrs(it) = %G",it,10); 374 PetscFunctionReturn(0); 375 } 376 for (ii=1; ii<=it; ii++) { 377 i = it - ii; 378 tt = nrs[i]; 379 for (j=i+1; j<=it; j++) tt = tt - *HH(i,j) * nrs[j]; 380 if (*HH(i,i) == 0.0) { 381 snes->reason = SNES_DIVERGED_MAX_IT; 382 ierr = PetscInfo1(snes,"Likely your matrix or preconditioner is singular. HH(k,k) is identically zero; i = %D",i); 383 PetscFunctionReturn(0); 384 } 385 nrs[i] = tt / *HH(i,i); 386 } 387 388 389 PetscFunctionReturn(0); 390 } 391