1 /* Defines the basic SNES object */ 2 #include <../src/snes/impls/ngmres/snesngmres.h> 3 #include <petscblaslapack.h> 4 5 6 /*MC 7 SNESNGMRES - The Nonlinear Generalized Minimum Residual (NGMRES) method of Oosterlee and Washio. 8 9 Level: beginner 10 11 Notes: Supports only left preconditioning 12 13 "Krylov Subspace Acceleration of Nonlinear Multigrid with Application to Recirculating Flows", C. W. Oosterlee and T. Washio, 14 SIAM Journal on Scientific Computing, 21(5), 2000. 15 16 .seealso: SNESCreate(), SNES, SNESSetType(), SNESType (for list of available types) 17 M*/ 18 19 #undef __FUNCT__ 20 #define __FUNCT__ "SNESReset_NGMRES" 21 PetscErrorCode SNESReset_NGMRES(SNES snes) 22 { 23 SNES_NGMRES *ngmres = (SNES_NGMRES*) snes->data; 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 ierr = VecDestroyVecs(ngmres->msize, &ngmres->rdot);CHKERRQ(ierr); 28 ierr = VecDestroyVecs(ngmres->msize, &ngmres->xdot);CHKERRQ(ierr); 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "SNESDestroy_NGMRES" 34 PetscErrorCode SNESDestroy_NGMRES(SNES snes) 35 { 36 PetscErrorCode ierr; 37 38 PetscFunctionBegin; 39 ierr = SNESReset_NGMRES(snes);CHKERRQ(ierr); 40 if (snes->work) {ierr = VecDestroyVecs(snes->nwork, &snes->work);CHKERRQ(ierr);} 41 if (snes->data) { 42 SNES_NGMRES * ngmres = (SNES_NGMRES *)snes->data; 43 ierr = PetscFree4(ngmres->h, ngmres->beta, ngmres->xi, ngmres->q);CHKERRQ(ierr); 44 ierr = PetscFree(ngmres->s);CHKERRQ(ierr); 45 #if PETSC_USE_COMPLEX 46 ierr = PetscFree(ngmres->rwork); 47 #endif 48 ierr = PetscFree(ngmres->work); 49 } 50 ierr = PetscFree(snes->data); 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "SNESSetUp_NGMRES" 56 PetscErrorCode SNESSetUp_NGMRES(SNES snes) 57 { 58 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 59 PetscInt msize,hsize; 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 msize = ngmres->msize; /* restart size */ 64 hsize = msize * msize; 65 66 67 //explicit least squares minimization solve 68 ierr = PetscMalloc5(hsize,PetscScalar,&ngmres->h, 69 msize,PetscScalar,&ngmres->beta, 70 msize,PetscScalar,&ngmres->xi, 71 msize,PetscReal,&ngmres->r_norms, 72 hsize,PetscScalar,&ngmres->q);CHKERRQ(ierr); 73 ngmres->nrhs = 1; 74 ngmres->lda = msize; 75 ngmres->ldb = msize; 76 ierr = PetscMalloc(msize*sizeof(PetscScalar),&ngmres->s);CHKERRQ(ierr); 77 78 ierr = PetscMemzero(ngmres->h,hsize*sizeof(PetscScalar));CHKERRQ(ierr); 79 ierr = PetscMemzero(ngmres->q,hsize*sizeof(PetscScalar));CHKERRQ(ierr); 80 ierr = PetscMemzero(ngmres->xi,msize*sizeof(PetscScalar));CHKERRQ(ierr); 81 ierr = PetscMemzero(ngmres->beta,msize*sizeof(PetscScalar));CHKERRQ(ierr); 82 83 ngmres->lwork = 12*msize; 84 #if PETSC_USE_COMPLEX 85 ierr = PetscMalloc(sizeof(PetscReal)*ngmres->lwork,&ngmres->rwork); 86 #endif 87 ierr = PetscMalloc(sizeof(PetscScalar)*ngmres->lwork,&ngmres->work); 88 89 ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->xdot);CHKERRQ(ierr); 90 ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->rdot);CHKERRQ(ierr); 91 ierr = SNESDefaultGetWork(snes, 3);CHKERRQ(ierr); 92 PetscFunctionReturn(0); 93 } 94 95 #undef __FUNCT__ 96 #define __FUNCT__ "SNESSetFromOptions_NGMRES" 97 PetscErrorCode SNESSetFromOptions_NGMRES(SNES snes) 98 { 99 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 100 PetscErrorCode ierr; 101 102 PetscFunctionBegin; 103 ierr = PetscOptionsHead("SNES NGMRES options");CHKERRQ(ierr); 104 ierr = PetscOptionsInt("-snes_ngmres_m", "Number of directions", "SNES", ngmres->msize, &ngmres->msize, PETSC_NULL);CHKERRQ(ierr); 105 ierr = PetscOptionsInt("-snes_ngmres_restart", "Maximum iterations before restart.", "SNES", ngmres->k_rmax, &ngmres->k_rmax, PETSC_NULL);CHKERRQ(ierr); 106 ierr = PetscOptionsBool("-snes_ngmres_debug", "Debugging output for NGMRES", "SNES", ngmres->debug, &ngmres->debug, PETSC_NULL);CHKERRQ(ierr); 107 ierr = PetscOptionsReal("-snes_ngmres_gammaA", "Residual selection constant", "SNES", ngmres->gammaA, &ngmres->gammaA, PETSC_NULL);CHKERRQ(ierr); 108 ierr = PetscOptionsReal("-snes_ngmres_gammaC", "Residual restart constant", "SNES", ngmres->gammaC, &ngmres->gammaC, PETSC_NULL);CHKERRQ(ierr); 109 ierr = PetscOptionsReal("-snes_ngmres_epsilonB", "Difference selection constant", "SNES", ngmres->epsilonB, &ngmres->epsilonB, PETSC_NULL);CHKERRQ(ierr); 110 ierr = PetscOptionsReal("-snes_ngmres_deltaB", "Difference residual selection constant", "SNES", ngmres->deltaB, &ngmres->deltaB, PETSC_NULL);CHKERRQ(ierr); 111 ierr = PetscOptionsTail();CHKERRQ(ierr); 112 if ((ngmres->gammaA > ngmres->gammaC) && (ngmres->gammaC > 2.)) ngmres->gammaC = ngmres->gammaA; 113 PetscFunctionReturn(0); 114 } 115 116 #undef __FUNCT__ 117 #define __FUNCT__ "SNESView_NGMRES" 118 PetscErrorCode SNESView_NGMRES(SNES snes, PetscViewer viewer) 119 { 120 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 121 PetscBool iascii; 122 PetscErrorCode ierr; 123 124 PetscFunctionBegin; 125 ierr = PetscTypeCompare((PetscObject) viewer, PETSCVIEWERASCII, &iascii);CHKERRQ(ierr); 126 if (iascii) { 127 ierr = PetscViewerASCIIPrintf(viewer, " Size of space %d\n", ngmres->msize);CHKERRQ(ierr); 128 } 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "SNESSolve_NGMRES" 135 136 PetscErrorCode SNESSolve_NGMRES(SNES snes) 137 { 138 SNES pc; 139 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 140 141 142 143 //present solution, residual, and preconditioned residual 144 Vec x, r, f, b, d; 145 Vec x_A, r_A; 146 147 //previous iterations to construct the subspace 148 Vec *rdot = ngmres->rdot; 149 Vec *xdot = ngmres->xdot; 150 151 //coefficients and RHS to the minimization problem 152 PetscScalar *beta = ngmres->beta; 153 PetscScalar *xi = ngmres->xi; 154 PetscReal r_norm, r_A_norm; 155 PetscReal nu; 156 PetscScalar alph_total = 0.; 157 PetscScalar qentry; 158 PetscInt i, j, k, k_restart, l, ivec; 159 160 //solution selection data 161 PetscBool selectA, selectRestart; 162 PetscReal d_norm, d_min_norm, d_cur_norm; 163 PetscReal r_min_norm; 164 165 PetscErrorCode ierr; 166 167 PetscFunctionBegin; 168 //variable initialization 169 snes->reason = SNES_CONVERGED_ITERATING; 170 x = snes->vec_sol; 171 r = snes->vec_func; 172 f = snes->work[0]; 173 b = snes->vec_rhs; 174 x_A = snes->vec_sol_update; 175 r_A = snes->work[1]; 176 d = snes->work[2]; 177 ierr = VecDuplicate(x,&r);CHKERRQ(ierr); 178 179 ierr = SNESGetPC(snes, &pc);CHKERRQ(ierr); 180 ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); 181 snes->iter = 0; 182 snes->norm = 0.; 183 ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); 184 185 //initialization -- 186 187 //r = F(u) 188 ierr = SNESComputeFunction(snes, x, r);CHKERRQ(ierr); /* r = F(x) */ 189 if (snes->domainerror) { 190 snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; 191 PetscFunctionReturn(0); 192 } 193 194 //nu = (r, r); 195 ierr = VecNorm(r, NORM_2, &r_norm);CHKERRQ(ierr); 196 r_min_norm = r_norm; 197 nu = r_norm*r_norm; 198 if (PetscIsInfOrNanReal(r_norm)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_FP, "Infinite or not-a-number generated in norm"); 199 200 //q_{11} = nu 201 202 Q(0,0) = nu; 203 ngmres->r_norms[0] = r_norm; 204 //rdot[0] = r 205 ierr = VecCopy(x, xdot[0]);CHKERRQ(ierr); 206 ierr = VecCopy(r, rdot[0]);CHKERRQ(ierr); 207 208 ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); 209 snes->norm = r_norm; 210 ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); 211 SNESLogConvHistory(snes, r_norm, 0); 212 ierr = SNESMonitor(snes, 0, r_norm);CHKERRQ(ierr); 213 ierr = (*snes->ops->converged)(snes,0,0.0,0.0,r_norm,&snes->reason,snes->cnvP);CHKERRQ(ierr); 214 if (snes->reason) PetscFunctionReturn(0); 215 216 k_restart = 1; 217 l = 1; 218 for (k=1; k<snes->max_its; k++) { 219 220 // 221 222 //select which vector of the stored subspace will be updated 223 ivec = k_restart % ngmres->msize; //replace the last used part of the subspace 224 225 226 //Computation of x^M 227 ierr = SNESSolve(pc, b, x);CHKERRQ(ierr); 228 //r = F(x) 229 ierr = SNESComputeFunction(snes, x, r);CHKERRQ(ierr); 230 ierr = VecNorm(r, NORM_2, &r_norm);CHKERRQ(ierr); 231 //nu = (r, r) 232 ngmres->r_norms[ivec] = r_norm; 233 nu = r_norm*r_norm; 234 if (r_min_norm > r_norm) r_min_norm = r_norm; //the minimum norm is now of r^M 235 236 //construct the right hand side and xi factors 237 for (i = 0; i < l; i++) { 238 VecDot(rdot[i], r, &xi[i]); 239 beta[i] = nu - xi[i]; 240 } 241 242 //construct h 243 for (j = 0; j < l; j++) { 244 for (i = 0; i < l; i++) { 245 H(i, j) = Q(i, j) - xi[i] - xi[j] + nu; 246 } 247 } 248 #ifdef PETSC_MISSING_LAPACK_GELSS 249 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "NGMRES with LS requires the LAPACK GELSS routine."); 250 #else 251 ngmres->m = PetscBLASIntCast(l); 252 ngmres->n = PetscBLASIntCast(l); 253 ngmres->info = PetscBLASIntCast(0); 254 ngmres->rcond = -1.; 255 ngmres->nrhs = 1; 256 #ifdef PETSC_USE_COMPLEX 257 LAPACKgelss_(&ngmres->m, 258 &ngmres->n, 259 &ngmres->nrhs, 260 ngmres->h, 261 &ngmres->lda, 262 ngmres->beta, 263 &ngmres->ldb, 264 ngmres->s, 265 &ngmres->rcond, 266 &ngmres->rank, 267 ngmres->work, 268 &ngmres->lwork, 269 ngmres->rwork, 270 &ngmres->info); 271 #else 272 LAPACKgelss_(&ngmres->m, 273 &ngmres->n, 274 &ngmres->nrhs, 275 ngmres->h, 276 &ngmres->lda, 277 ngmres->beta, 278 &ngmres->ldb, 279 ngmres->s, 280 &ngmres->rcond, 281 &ngmres->rank, 282 ngmres->work, 283 &ngmres->lwork, 284 &ngmres->info); 285 #endif 286 if (ngmres->info < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to GELSS"); 287 if (ngmres->info > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"SVD failed to converge"); 288 #endif 289 290 alph_total = 0.; 291 for (i = 0; i < l; i++) { 292 alph_total += beta[i]; 293 } 294 ierr = VecCopy(x, x_A);CHKERRQ(ierr); 295 ierr = VecScale(x_A, 1. - alph_total);CHKERRQ(ierr); 296 297 for(i=0;i<l;i++){ 298 ierr= VecAXPY(x_A, beta[i], xdot[i]);CHKERRQ(ierr); 299 } 300 ierr = SNESComputeFunction(snes, x_A, r_A);CHKERRQ(ierr); 301 ierr = VecNorm(r_A, NORM_2, &r_A_norm);CHKERRQ(ierr); 302 303 selectA = PETSC_TRUE; 304 //Conditions for choosing the accelerated answer -- 305 306 //Criterion A -- the norm of the function isn't increased above the minimum by too much 307 if (r_A_norm >= ngmres->gammaA*r_min_norm) { 308 selectA = PETSC_FALSE; 309 } 310 311 //Criterion B -- the choice of x^A isn't too close to some other choice 312 ierr=VecCopy(x_A,d);CHKERRQ(ierr); 313 ierr=VecAXPY(d,-1,x);CHKERRQ(ierr); 314 ierr=VecNorm(d,NORM_2,&d_norm);CHKERRQ(ierr); 315 d_min_norm=10000000; 316 for(i=0;i<l;i++) { 317 ierr=VecCopy(x_A,d);CHKERRQ(ierr); 318 ierr=VecAXPY(d,-1,xdot[i]);CHKERRQ(ierr); 319 ierr=VecNorm(d,NORM_2,&d_cur_norm);CHKERRQ(ierr); 320 if(d_cur_norm<d_min_norm) d_min_norm=d_cur_norm; 321 } 322 if (ngmres->epsilonB*d_norm<d_min_norm || sqrt(r_norm)<ngmres->deltaB*sqrt(r_min_norm)) { 323 } else { 324 selectA=PETSC_FALSE; 325 } 326 327 328 if (selectA) { 329 if (ngmres->debug) 330 PetscPrintf(PETSC_COMM_WORLD, "picked r_A, ||r_A||_2 = %e, ||r_M||_2 = %e\n", r_A_norm, r_norm); 331 //copy it over 332 r_norm = r_A_norm; 333 nu = r_norm*r_norm; 334 ierr = VecCopy(r_A, r);CHKERRQ(ierr); 335 ierr = VecCopy(x_A, x);CHKERRQ(ierr); 336 } else { 337 if(ngmres->debug) 338 PetscPrintf(PETSC_COMM_WORLD, "picked r_M, ||r_A||_2 = %e, ||r_M||_2 = %e\n", r_A_norm, r_norm); 339 } 340 341 selectRestart = PETSC_FALSE; 342 343 //maximum iteration criterion 344 if (k_restart > ngmres->k_rmax) { 345 selectRestart = PETSC_TRUE; 346 } 347 348 //difference stagnation restart 349 if ((ngmres->epsilonB*d_norm > d_min_norm) && 350 (sqrt(r_A_norm) > ngmres->deltaB*sqrt(r_min_norm))) { 351 if (ngmres->debug) 352 PetscPrintf(PETSC_COMM_WORLD, "difference restart: %e > %e\n", ngmres->epsilonB*d_norm, d_min_norm); 353 selectRestart = PETSC_TRUE; 354 } 355 356 // residual stagnation restart 357 if (sqrt(r_A_norm) > ngmres->gammaC*sqrt(r_min_norm)) { 358 if (ngmres->debug) 359 PetscPrintf(PETSC_COMM_WORLD, "residual restart: %e > %e\n", sqrt(r_A_norm), ngmres->gammaC*sqrt(r_min_norm)); 360 selectRestart = PETSC_TRUE; 361 } 362 363 if (selectRestart) { 364 if (ngmres->debug) 365 PetscPrintf(PETSC_COMM_WORLD, "Restarted at iteration %d\n", k_restart); 366 k_restart = 1; 367 l = 1; 368 //q_{11} = nu 369 ngmres->r_norms[0] = r_norm; 370 nu = r_norm*r_norm; 371 Q(0,0) = nu; 372 //rdot[0] = r 373 ierr = VecCopy(x, xdot[0]);CHKERRQ(ierr); 374 ierr = VecCopy(r, rdot[0]);CHKERRQ(ierr); 375 } else { 376 //select the current size of the subspace 377 if (l < ngmres->msize) { 378 l++; 379 } 380 k_restart++; 381 //place the current entry in the list of previous entries 382 ierr = VecCopy(r, rdot[ivec]);CHKERRQ(ierr); 383 ierr = VecCopy(x, xdot[ivec]);CHKERRQ(ierr); 384 ngmres->r_norms[ivec] = r_norm; 385 if (r_min_norm > r_norm) r_min_norm = r_norm; //the minimum norm is now of r^A 386 for (i = 0; i < l; i++) { 387 VecDot(r, rdot[i], &qentry); 388 Q(i, ivec) = qentry; 389 Q(ivec, i) = qentry; 390 } 391 } 392 393 SNESLogConvHistory(snes, r_norm, k); 394 ierr = SNESMonitor(snes, k, r_norm);CHKERRQ(ierr); 395 396 snes->iter =k; 397 ierr = (*snes->ops->converged)(snes,snes->iter,0.0,0.0,r_norm,&snes->reason,snes->cnvP);CHKERRQ(ierr); 398 if (snes->reason) PetscFunctionReturn(0); 399 } 400 snes->reason = SNES_DIVERGED_MAX_IT; 401 PetscFunctionReturn(0); 402 } 403 404 405 406 EXTERN_C_BEGIN 407 #undef __FUNCT__ 408 #define __FUNCT__ "SNESCreate_NGMRES" 409 PetscErrorCode SNESCreate_NGMRES(SNES snes) 410 { 411 SNES_NGMRES *ngmres; 412 PetscErrorCode ierr; 413 414 PetscFunctionBegin; 415 snes->ops->destroy = SNESDestroy_NGMRES; 416 snes->ops->setup = SNESSetUp_NGMRES; 417 snes->ops->setfromoptions = SNESSetFromOptions_NGMRES; 418 snes->ops->view = SNESView_NGMRES; 419 snes->ops->solve = SNESSolve_NGMRES; 420 snes->ops->reset = SNESReset_NGMRES; 421 422 ierr = PetscNewLog(snes, SNES_NGMRES, &ngmres);CHKERRQ(ierr); 423 snes->data = (void*) ngmres; 424 ngmres->msize = 10; 425 ngmres->debug = PETSC_FALSE; 426 427 ngmres->gammaA = 2.; 428 ngmres->gammaC = 2.; 429 ngmres->deltaB = 0.99; 430 ngmres->epsilonB = 0.01; 431 ngmres->k_rmax = 100; 432 433 ierr = SNESGetPC(snes, &snes->pc);CHKERRQ(ierr); 434 PetscFunctionReturn(0); 435 } 436 EXTERN_C_END 437