1 /* Defines the basic SNES object */ 2 #include <../src/snes/impls/ngmres/snesngmres.h> 3 #include <petscblaslapack.h> 4 5 6 /*MC 7 SNESNGMRES - The Nonlinear Generalized Minimum Residual (NGMRES) method of Oosterlee and Washio. 8 9 Level: beginner 10 11 "Krylov Subspace Acceleration of Nonlinear Multigrid with Application to Recirculating Flows", C. W. Oosterlee and T. Washio, 12 SIAM Journal on Scientific Computing, 21(5), 2000. 13 14 .seealso: SNESCreate(), SNES, SNESSetType(), SNESType (for list of available types) 15 M*/ 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "SNESReset_NGMRES" 19 PetscErrorCode SNESReset_NGMRES(SNES snes) 20 { 21 SNES_NGMRES *ngmres = (SNES_NGMRES*) snes->data; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 ierr = VecDestroyVecs(ngmres->msize, &ngmres->rdot);CHKERRQ(ierr); 26 ierr = VecDestroyVecs(ngmres->msize, &ngmres->xdot);CHKERRQ(ierr); 27 PetscFunctionReturn(0); 28 } 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "SNESDestroy_NGMRES" 32 PetscErrorCode SNESDestroy_NGMRES(SNES snes) 33 { 34 PetscErrorCode ierr; 35 36 PetscFunctionBegin; 37 ierr = SNESReset_NGMRES(snes);CHKERRQ(ierr); 38 if (snes->work) {ierr = VecDestroyVecs(snes->nwork, &snes->work);CHKERRQ(ierr);} 39 if (snes->data) { 40 SNES_NGMRES * ngmres = (SNES_NGMRES *)snes->data; 41 ierr = PetscFree5(ngmres->h, ngmres->beta, ngmres->xi, ngmres->r_norms, ngmres->q);CHKERRQ(ierr); 42 ierr = PetscFree(ngmres->s);CHKERRQ(ierr); 43 #if PETSC_USE_COMPLEX 44 ierr = PetscFree(ngmres->rwork); 45 #endif 46 ierr = PetscFree(ngmres->work); 47 } 48 ierr = PetscFree(snes->data); 49 PetscFunctionReturn(0); 50 } 51 52 #undef __FUNCT__ 53 #define __FUNCT__ "SNESSetUp_NGMRES" 54 PetscErrorCode SNESSetUp_NGMRES(SNES snes) 55 { 56 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 57 PetscInt msize,hsize; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 msize = ngmres->msize; /* restart size */ 62 hsize = msize * msize; 63 64 65 //explicit least squares minimization solve 66 ierr = PetscMalloc5(hsize,PetscScalar,&ngmres->h, 67 msize,PetscScalar,&ngmres->beta, 68 msize,PetscScalar,&ngmres->xi, 69 msize,PetscReal,&ngmres->r_norms, 70 hsize,PetscScalar,&ngmres->q);CHKERRQ(ierr); 71 ngmres->nrhs = 1; 72 ngmres->lda = msize; 73 ngmres->ldb = msize; 74 ierr = PetscMalloc(msize*sizeof(PetscScalar),&ngmres->s);CHKERRQ(ierr); 75 76 ierr = PetscMemzero(ngmres->h,hsize*sizeof(PetscScalar));CHKERRQ(ierr); 77 ierr = PetscMemzero(ngmres->q,hsize*sizeof(PetscScalar));CHKERRQ(ierr); 78 ierr = PetscMemzero(ngmres->xi,msize*sizeof(PetscScalar));CHKERRQ(ierr); 79 ierr = PetscMemzero(ngmres->beta,msize*sizeof(PetscScalar));CHKERRQ(ierr); 80 81 ngmres->lwork = 12*msize; 82 #if PETSC_USE_COMPLEX 83 ierr = PetscMalloc(sizeof(PetscReal)*ngmres->lwork,&ngmres->rwork); 84 #endif 85 ierr = PetscMalloc(sizeof(PetscScalar)*ngmres->lwork,&ngmres->work); 86 87 ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->xdot);CHKERRQ(ierr); 88 ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->rdot);CHKERRQ(ierr); 89 ierr = SNESDefaultGetWork(snes, 3);CHKERRQ(ierr); 90 PetscFunctionReturn(0); 91 } 92 93 #undef __FUNCT__ 94 #define __FUNCT__ "SNESSetFromOptions_NGMRES" 95 PetscErrorCode SNESSetFromOptions_NGMRES(SNES snes) 96 { 97 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 98 PetscErrorCode ierr; 99 100 PetscFunctionBegin; 101 ierr = PetscOptionsHead("SNES NGMRES options");CHKERRQ(ierr); 102 ierr = PetscOptionsInt("-snes_ngmres_m", "Number of directions", "SNES", ngmres->msize, &ngmres->msize, PETSC_NULL);CHKERRQ(ierr); 103 ierr = PetscOptionsInt("-snes_ngmres_restart", "Maximum iterations before restart.", "SNES", ngmres->k_rmax, &ngmres->k_rmax, PETSC_NULL);CHKERRQ(ierr); 104 ierr = PetscOptionsBool("-snes_ngmres_debug", "Debugging output for NGMRES", "SNES", ngmres->debug, &ngmres->debug, PETSC_NULL);CHKERRQ(ierr); 105 ierr = PetscOptionsReal("-snes_ngmres_gammaA", "Residual selection constant", "SNES", ngmres->gammaA, &ngmres->gammaA, PETSC_NULL);CHKERRQ(ierr); 106 ierr = PetscOptionsReal("-snes_ngmres_gammaC", "Residual restart constant", "SNES", ngmres->gammaC, &ngmres->gammaC, PETSC_NULL);CHKERRQ(ierr); 107 ierr = PetscOptionsReal("-snes_ngmres_epsilonB", "Difference selection constant", "SNES", ngmres->epsilonB, &ngmres->epsilonB, PETSC_NULL);CHKERRQ(ierr); 108 ierr = PetscOptionsReal("-snes_ngmres_deltaB", "Difference residual selection constant", "SNES", ngmres->deltaB, &ngmres->deltaB, PETSC_NULL);CHKERRQ(ierr); 109 ierr = PetscOptionsTail();CHKERRQ(ierr); 110 if ((ngmres->gammaA > ngmres->gammaC) && (ngmres->gammaC > 2.)) ngmres->gammaC = ngmres->gammaA; 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "SNESView_NGMRES" 116 PetscErrorCode SNESView_NGMRES(SNES snes, PetscViewer viewer) 117 { 118 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 119 PetscBool iascii; 120 PetscErrorCode ierr; 121 122 PetscFunctionBegin; 123 ierr = PetscTypeCompare((PetscObject) viewer, PETSCVIEWERASCII, &iascii);CHKERRQ(ierr); 124 if (iascii) { 125 ierr = PetscViewerASCIIPrintf(viewer, " Size of space %d\n", ngmres->msize);CHKERRQ(ierr); 126 ierr = PetscViewerASCIIPrintf(viewer, " Maximum iterations before restart %d\n", ngmres->k_rmax);CHKERRQ(ierr); 127 } 128 PetscFunctionReturn(0); 129 } 130 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "SNESSolve_NGMRES" 134 135 PetscErrorCode SNESSolve_NGMRES(SNES snes) 136 { 137 SNES pc; 138 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 139 140 141 142 //present solution, residual, and preconditioned residual 143 Vec x, r, f, b, d; 144 Vec x_A, r_A; 145 146 //previous iterations to construct the subspace 147 Vec *rdot = ngmres->rdot; 148 Vec *xdot = ngmres->xdot; 149 150 //coefficients and RHS to the minimization problem 151 PetscScalar *beta = ngmres->beta; 152 PetscScalar *xi = ngmres->xi; 153 PetscReal r_norm, r_A_norm; 154 PetscReal nu; 155 PetscScalar alph_total = 0.; 156 PetscScalar qentry; 157 PetscInt i, j, k, k_restart, l, ivec; 158 159 //solution selection data 160 PetscBool selectA, selectRestart; 161 PetscReal d_norm, d_min_norm, d_cur_norm; 162 PetscReal r_min_norm; 163 164 PetscErrorCode ierr; 165 166 PetscFunctionBegin; 167 //variable initialization 168 snes->reason = SNES_CONVERGED_ITERATING; 169 x = snes->vec_sol; 170 r = snes->vec_func; 171 f = snes->work[0]; 172 b = snes->vec_rhs; 173 x_A = snes->vec_sol_update; 174 r_A = snes->work[1]; 175 d = snes->work[2]; 176 ierr = VecDuplicate(x,&r);CHKERRQ(ierr); 177 178 ierr = SNESGetPC(snes, &pc);CHKERRQ(ierr); 179 ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); 180 snes->iter = 0; 181 snes->norm = 0.; 182 ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); 183 184 //initialization -- 185 186 //r = F(u) 187 ierr = SNESComputeFunction(snes, x, r);CHKERRQ(ierr); /* r = F(x) */ 188 if (snes->domainerror) { 189 snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; 190 PetscFunctionReturn(0); 191 } 192 193 //nu = (r, r); 194 ierr = VecNorm(r, NORM_2, &r_norm);CHKERRQ(ierr); 195 r_min_norm = r_norm; 196 nu = r_norm*r_norm; 197 if (PetscIsInfOrNanReal(r_norm)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_FP, "Infinite or not-a-number generated in norm"); 198 199 //q_{11} = nu 200 201 Q(0,0) = nu; 202 ngmres->r_norms[0] = r_norm; 203 //rdot[0] = r 204 ierr = VecCopy(x, xdot[0]);CHKERRQ(ierr); 205 ierr = VecCopy(r, rdot[0]);CHKERRQ(ierr); 206 207 ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); 208 snes->norm = r_norm; 209 ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); 210 SNESLogConvHistory(snes, r_norm, 0); 211 ierr = SNESMonitor(snes, 0, r_norm);CHKERRQ(ierr); 212 ierr = (*snes->ops->converged)(snes,0,0.0,0.0,r_norm,&snes->reason,snes->cnvP);CHKERRQ(ierr); 213 if (snes->reason) PetscFunctionReturn(0); 214 215 k_restart = 1; 216 l = 1; 217 for (k=1; k<snes->max_its; k++) { 218 219 // 220 221 //select which vector of the stored subspace will be updated 222 ivec = k_restart % ngmres->msize; //replace the last used part of the subspace 223 224 225 //Computation of x^M 226 ierr = SNESSolve(pc, b, x);CHKERRQ(ierr); 227 //r = F(x) 228 ierr = SNESComputeFunction(snes, x, r);CHKERRQ(ierr); 229 ierr = VecNorm(r, NORM_2, &r_norm);CHKERRQ(ierr); 230 //nu = (r, r) 231 ngmres->r_norms[ivec] = r_norm; 232 nu = r_norm*r_norm; 233 if (r_min_norm > r_norm) r_min_norm = r_norm; //the minimum norm is now of r^M 234 235 //construct the right hand side and xi factors 236 for (i = 0; i < l; i++) { 237 VecDot(rdot[i], r, &xi[i]); 238 beta[i] = nu - xi[i]; 239 } 240 241 //construct h 242 for (j = 0; j < l; j++) { 243 for (i = 0; i < l; i++) { 244 H(i, j) = Q(i, j) - xi[i] - xi[j] + nu; 245 } 246 } 247 #ifdef PETSC_MISSING_LAPACK_GELSS 248 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "NGMRES with LS requires the LAPACK GELSS routine."); 249 #else 250 ngmres->m = PetscBLASIntCast(l); 251 ngmres->n = PetscBLASIntCast(l); 252 ngmres->info = PetscBLASIntCast(0); 253 ngmres->rcond = -1.; 254 ngmres->nrhs = 1; 255 #ifdef PETSC_USE_COMPLEX 256 LAPACKgelss_(&ngmres->m, 257 &ngmres->n, 258 &ngmres->nrhs, 259 ngmres->h, 260 &ngmres->lda, 261 ngmres->beta, 262 &ngmres->ldb, 263 ngmres->s, 264 &ngmres->rcond, 265 &ngmres->rank, 266 ngmres->work, 267 &ngmres->lwork, 268 ngmres->rwork, 269 &ngmres->info); 270 #else 271 LAPACKgelss_(&ngmres->m, 272 &ngmres->n, 273 &ngmres->nrhs, 274 ngmres->h, 275 &ngmres->lda, 276 ngmres->beta, 277 &ngmres->ldb, 278 ngmres->s, 279 &ngmres->rcond, 280 &ngmres->rank, 281 ngmres->work, 282 &ngmres->lwork, 283 &ngmres->info); 284 #endif 285 if (ngmres->info < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to GELSS"); 286 if (ngmres->info > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"SVD failed to converge"); 287 #endif 288 289 alph_total = 0.; 290 for (i = 0; i < l; i++) { 291 alph_total += beta[i]; 292 } 293 ierr = VecCopy(x, x_A);CHKERRQ(ierr); 294 ierr = VecScale(x_A, 1. - alph_total);CHKERRQ(ierr); 295 296 for(i=0;i<l;i++){ 297 ierr= VecAXPY(x_A, beta[i], xdot[i]);CHKERRQ(ierr); 298 } 299 ierr = SNESComputeFunction(snes, x_A, r_A);CHKERRQ(ierr); 300 ierr = VecNorm(r_A, NORM_2, &r_A_norm);CHKERRQ(ierr); 301 302 selectA = PETSC_TRUE; 303 //Conditions for choosing the accelerated answer -- 304 305 //Criterion A -- the norm of the function isn't increased above the minimum by too much 306 if (r_A_norm >= ngmres->gammaA*r_min_norm) { 307 selectA = PETSC_FALSE; 308 } 309 310 //Criterion B -- the choice of x^A isn't too close to some other choice 311 ierr=VecCopy(x_A,d);CHKERRQ(ierr); 312 ierr=VecAXPY(d,-1,x);CHKERRQ(ierr); 313 ierr=VecNorm(d,NORM_2,&d_norm);CHKERRQ(ierr); 314 d_min_norm=10000000; 315 for(i=0;i<l;i++) { 316 ierr=VecCopy(x_A,d);CHKERRQ(ierr); 317 ierr=VecAXPY(d,-1,xdot[i]);CHKERRQ(ierr); 318 ierr=VecNorm(d,NORM_2,&d_cur_norm);CHKERRQ(ierr); 319 if(d_cur_norm<d_min_norm) d_min_norm=d_cur_norm; 320 } 321 if (ngmres->epsilonB*d_norm<d_min_norm || sqrt(r_norm)<ngmres->deltaB*sqrt(r_min_norm)) { 322 } else { 323 selectA=PETSC_FALSE; 324 } 325 326 327 if (selectA) { 328 if (ngmres->debug) 329 PetscPrintf(PETSC_COMM_WORLD, "picked r_A, ||r_A||_2 = %e, ||r_M||_2 = %e\n", r_A_norm, r_norm); 330 //copy it over 331 r_norm = r_A_norm; 332 nu = r_norm*r_norm; 333 ierr = VecCopy(r_A, r);CHKERRQ(ierr); 334 ierr = VecCopy(x_A, x);CHKERRQ(ierr); 335 } else { 336 if(ngmres->debug) 337 PetscPrintf(PETSC_COMM_WORLD, "picked r_M, ||r_A||_2 = %e, ||r_M||_2 = %e\n", r_A_norm, r_norm); 338 } 339 340 selectRestart = PETSC_FALSE; 341 342 //maximum iteration criterion 343 if (k_restart > ngmres->k_rmax) { 344 selectRestart = PETSC_TRUE; 345 } 346 347 //difference stagnation restart 348 if ((ngmres->epsilonB*d_norm > d_min_norm) && 349 (sqrt(r_A_norm) > ngmres->deltaB*sqrt(r_min_norm))) { 350 if (ngmres->debug) 351 PetscPrintf(PETSC_COMM_WORLD, "difference restart: %e > %e\n", ngmres->epsilonB*d_norm, d_min_norm); 352 selectRestart = PETSC_TRUE; 353 } 354 355 // residual stagnation restart 356 if (sqrt(r_A_norm) > ngmres->gammaC*sqrt(r_min_norm)) { 357 if (ngmres->debug) 358 PetscPrintf(PETSC_COMM_WORLD, "residual restart: %e > %e\n", sqrt(r_A_norm), ngmres->gammaC*sqrt(r_min_norm)); 359 selectRestart = PETSC_TRUE; 360 } 361 362 if (selectRestart) { 363 if (ngmres->debug) 364 PetscPrintf(PETSC_COMM_WORLD, "Restarted at iteration %d\n", k_restart); 365 k_restart = 1; 366 l = 1; 367 //q_{11} = nu 368 ngmres->r_norms[0] = r_norm; 369 nu = r_norm*r_norm; 370 Q(0,0) = nu; 371 //rdot[0] = r 372 ierr = VecCopy(x, xdot[0]);CHKERRQ(ierr); 373 ierr = VecCopy(r, rdot[0]);CHKERRQ(ierr); 374 } else { 375 //select the current size of the subspace 376 if (l < ngmres->msize) { 377 l++; 378 } 379 k_restart++; 380 //place the current entry in the list of previous entries 381 ierr = VecCopy(r, rdot[ivec]);CHKERRQ(ierr); 382 ierr = VecCopy(x, xdot[ivec]);CHKERRQ(ierr); 383 ngmres->r_norms[ivec] = r_norm; 384 if (r_min_norm > r_norm) r_min_norm = r_norm; //the minimum norm is now of r^A 385 for (i = 0; i < l; i++) { 386 VecDot(r, rdot[i], &qentry); 387 Q(i, ivec) = qentry; 388 Q(ivec, i) = qentry; 389 } 390 } 391 392 SNESLogConvHistory(snes, r_norm, k); 393 ierr = SNESMonitor(snes, k, r_norm);CHKERRQ(ierr); 394 395 snes->iter =k; 396 ierr = (*snes->ops->converged)(snes,snes->iter,0.0,0.0,r_norm,&snes->reason,snes->cnvP);CHKERRQ(ierr); 397 if (snes->reason) PetscFunctionReturn(0); 398 } 399 snes->reason = SNES_DIVERGED_MAX_IT; 400 PetscFunctionReturn(0); 401 } 402 403 404 405 EXTERN_C_BEGIN 406 #undef __FUNCT__ 407 #define __FUNCT__ "SNESCreate_NGMRES" 408 PetscErrorCode SNESCreate_NGMRES(SNES snes) 409 { 410 SNES_NGMRES *ngmres; 411 PetscErrorCode ierr; 412 413 PetscFunctionBegin; 414 snes->ops->destroy = SNESDestroy_NGMRES; 415 snes->ops->setup = SNESSetUp_NGMRES; 416 snes->ops->setfromoptions = SNESSetFromOptions_NGMRES; 417 snes->ops->view = SNESView_NGMRES; 418 snes->ops->solve = SNESSolve_NGMRES; 419 snes->ops->reset = SNESReset_NGMRES; 420 421 ierr = PetscNewLog(snes, SNES_NGMRES, &ngmres);CHKERRQ(ierr); 422 snes->data = (void*) ngmres; 423 ngmres->msize = 10; 424 ngmres->debug = PETSC_FALSE; 425 426 ngmres->gammaA = 2.; 427 ngmres->gammaC = 2.; 428 ngmres->deltaB = 0.9; 429 ngmres->epsilonB = 0.1; 430 ngmres->k_rmax = 200; 431 432 ierr = SNESGetPC(snes, &snes->pc);CHKERRQ(ierr); 433 PetscFunctionReturn(0); 434 } 435 EXTERN_C_END 436