1 /* Defines the basic SNES object */ 2 #include <../src/snes/impls/ngmres/snesngmres.h> 3 #include <petscblaslapack.h> 4 5 6 /*MC 7 SNESNGMRES - The Nonlinear Generalized Minimum Residual (NGMRES) method of Oosterlee and Washio. 8 9 Level: beginner 10 11 Notes: Supports only left preconditioning 12 13 "Krylov Subspace Acceleration of Nonlinear Multigrid with Application to Recirculating Flows", C. W. Oosterlee and T. Washio, 14 SIAM Journal on Scientific Computing, 21(5), 2000. 15 16 .seealso: SNESCreate(), SNES, SNESSetType(), SNESType (for list of available types) 17 M*/ 18 19 #undef __FUNCT__ 20 #define __FUNCT__ "SNESReset_NGMRES" 21 PetscErrorCode SNESReset_NGMRES(SNES snes) 22 { 23 SNES_NGMRES *ngmres = (SNES_NGMRES*) snes->data; 24 PetscErrorCode ierr; 25 26 PetscFunctionBegin; 27 ierr = VecDestroyVecs(ngmres->msize, &ngmres->rdot);CHKERRQ(ierr); 28 ierr = VecDestroyVecs(ngmres->msize, &ngmres->xdot);CHKERRQ(ierr); 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "SNESDestroy_NGMRES" 34 PetscErrorCode SNESDestroy_NGMRES(SNES snes) 35 { 36 PetscErrorCode ierr; 37 38 PetscFunctionBegin; 39 ierr = SNESReset_NGMRES(snes);CHKERRQ(ierr); 40 if (snes->work) {ierr = VecDestroyVecs(snes->nwork, &snes->work);CHKERRQ(ierr);} 41 if (snes->data) { 42 SNES_NGMRES * ngmres = (SNES_NGMRES *)snes->data; 43 ierr = PetscFree4(ngmres->h, ngmres->beta, ngmres->xi, ngmres->q);CHKERRQ(ierr); 44 ierr = PetscFree(ngmres->s);CHKERRQ(ierr); 45 #if PETSC_USE_COMPLEX 46 ierr = PetscFree(ngmres->rwork); 47 #endif 48 ierr = PetscFree(ngmres->work); 49 } 50 ierr = PetscFree(snes->data); 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "SNESSetUp_NGMRES" 56 PetscErrorCode SNESSetUp_NGMRES(SNES snes) 57 { 58 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 59 PetscInt msize,hsize; 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 msize = ngmres->msize; /* restart size */ 64 hsize = msize * msize; 65 66 67 //explicit least squares minimization solve 68 ierr = PetscMalloc5(hsize,PetscScalar,&ngmres->h, 69 msize,PetscScalar,&ngmres->beta, 70 msize,PetscScalar,&ngmres->xi, 71 msize,PetscReal,&ngmres->r_norms, 72 hsize,PetscScalar,&ngmres->q);CHKERRQ(ierr); 73 ngmres->nrhs = 1; 74 ngmres->lda = msize; 75 ngmres->ldb = msize; 76 ierr = PetscMalloc(msize*sizeof(PetscScalar),&ngmres->s);CHKERRQ(ierr); 77 78 ierr = PetscMemzero(ngmres->h,hsize*sizeof(PetscScalar));CHKERRQ(ierr); 79 ierr = PetscMemzero(ngmres->q,hsize*sizeof(PetscScalar));CHKERRQ(ierr); 80 ierr = PetscMemzero(ngmres->xi,msize*sizeof(PetscScalar));CHKERRQ(ierr); 81 ierr = PetscMemzero(ngmres->beta,msize*sizeof(PetscScalar));CHKERRQ(ierr); 82 83 ngmres->lwork = 12*msize; 84 #if PETSC_USE_COMPLEX 85 ierr = PetscMalloc(sizeof(PetscReal)*ngmres->lwork,&ngmres->rwork); 86 #endif 87 ierr = PetscMalloc(sizeof(PetscScalar)*ngmres->lwork,&ngmres->work); 88 89 ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->xdot);CHKERRQ(ierr); 90 ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->rdot);CHKERRQ(ierr); 91 ierr = SNESDefaultGetWork(snes, 3);CHKERRQ(ierr); 92 PetscFunctionReturn(0); 93 } 94 95 #undef __FUNCT__ 96 #define __FUNCT__ "SNESSetFromOptions_NGMRES" 97 PetscErrorCode SNESSetFromOptions_NGMRES(SNES snes) 98 { 99 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 100 PetscErrorCode ierr; 101 102 PetscFunctionBegin; 103 ierr = PetscOptionsHead("SNES NGMRES options");CHKERRQ(ierr); 104 ierr = PetscOptionsInt("-snes_ngmres_m", "Number of directions", "SNES", ngmres->msize, &ngmres->msize, PETSC_NULL);CHKERRQ(ierr); 105 ierr = PetscOptionsInt("-snes_ngmres_restart", "Maximum iterations before restart.", "SNES", ngmres->k_rmax, &ngmres->k_rmax, PETSC_NULL);CHKERRQ(ierr); 106 ierr = PetscOptionsBool("-snes_ngmres_debug", "Debugging output for NGMRES", "SNES", ngmres->debug, &ngmres->debug, PETSC_NULL);CHKERRQ(ierr); 107 ierr = PetscOptionsTail();CHKERRQ(ierr); 108 PetscFunctionReturn(0); 109 } 110 111 #undef __FUNCT__ 112 #define __FUNCT__ "SNESView_NGMRES" 113 PetscErrorCode SNESView_NGMRES(SNES snes, PetscViewer viewer) 114 { 115 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 116 PetscBool iascii; 117 PetscErrorCode ierr; 118 119 PetscFunctionBegin; 120 ierr = PetscTypeCompare((PetscObject) viewer, PETSCVIEWERASCII, &iascii);CHKERRQ(ierr); 121 if (iascii) { 122 ierr = PetscViewerASCIIPrintf(viewer, " Size of space %d\n", ngmres->msize);CHKERRQ(ierr); 123 } 124 PetscFunctionReturn(0); 125 } 126 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "SNESSolve_NGMRES" 130 131 PetscErrorCode SNESSolve_NGMRES(SNES snes) 132 { 133 SNES pc; 134 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 135 136 137 138 //present solution, residual, and preconditioned residual 139 Vec x, r, f, b, d; 140 Vec x_A, r_A; 141 142 //previous iterations to construct the subspace 143 Vec *rdot = ngmres->rdot; 144 Vec *xdot = ngmres->xdot; 145 146 //coefficients and RHS to the minimization problem 147 PetscScalar *beta = ngmres->beta; 148 PetscScalar *xi = ngmres->xi; 149 PetscReal r_norm, r_A_norm; 150 PetscReal nu; 151 PetscScalar alph_total = 0.; 152 PetscScalar qentry; 153 PetscInt i, j, k, k_restart, l, ivec; 154 155 //solution selection data 156 PetscBool selectA, selectRestart; 157 PetscReal d_norm, d_min_norm, d_cur_norm; 158 PetscReal r_min_norm; 159 160 PetscErrorCode ierr; 161 162 PetscFunctionBegin; 163 //variable initialization 164 snes->reason = SNES_CONVERGED_ITERATING; 165 x = snes->vec_sol; 166 r = snes->vec_func; 167 f = snes->work[0]; 168 b = snes->vec_rhs; 169 x_A = snes->vec_sol_update; 170 r_A = snes->work[1]; 171 d = snes->work[2]; 172 ierr = VecDuplicate(x,&r);CHKERRQ(ierr); 173 174 ierr = SNESGetPC(snes, &pc);CHKERRQ(ierr); 175 ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); 176 snes->iter = 0; 177 snes->norm = 0.; 178 ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); 179 180 //initialization -- 181 182 //r = F(u) 183 ierr = SNESComputeFunction(snes, x, r);CHKERRQ(ierr); /* r = F(x) */ 184 if (snes->domainerror) { 185 snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; 186 PetscFunctionReturn(0); 187 } 188 189 //nu = (r, r); 190 ierr = VecNorm(r, NORM_2, &r_norm);CHKERRQ(ierr); 191 r_min_norm = r_norm; 192 nu = r_norm*r_norm; 193 if (PetscIsInfOrNanReal(r_norm)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_FP, "Infinite or not-a-number generated in norm"); 194 195 //q_{11} = nu 196 197 Q(0,0) = nu; 198 ngmres->r_norms[0] = r_norm; 199 //rdot[0] = r 200 ierr = VecCopy(x, xdot[0]);CHKERRQ(ierr); 201 ierr = VecCopy(r, rdot[0]);CHKERRQ(ierr); 202 203 ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); 204 snes->norm = r_norm; 205 ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); 206 SNESLogConvHistory(snes, r_norm, 0); 207 ierr = SNESMonitor(snes, 0, r_norm);CHKERRQ(ierr); 208 ierr = (*snes->ops->converged)(snes,0,0.0,0.0,r_norm,&snes->reason,snes->cnvP);CHKERRQ(ierr); 209 if (snes->reason) PetscFunctionReturn(0); 210 211 k_restart = 1; 212 l = 1; 213 for (k=1; k<snes->max_its; k++) { 214 215 // 216 217 //select which vector of the stored subspace will be updated 218 ivec = k_restart % ngmres->msize; //replace the last used part of the subspace 219 220 221 //Computation of x^M 222 ierr = SNESSolve(pc, b, x);CHKERRQ(ierr); 223 //r = F(x) 224 ierr = SNESComputeFunction(snes, x, r);CHKERRQ(ierr); 225 ierr = VecNorm(r, NORM_2, &r_norm);CHKERRQ(ierr); 226 //nu = (r, r) 227 ngmres->r_norms[ivec] = r_norm; 228 nu = r_norm*r_norm; 229 if (r_min_norm > r_norm) r_min_norm = r_norm; //the minimum norm is now of r^M 230 231 //construct the right hand side and xi factors 232 for (i = 0; i < l; i++) { 233 VecDot(rdot[i], r, &xi[i]); 234 beta[i] = nu - xi[i]; 235 } 236 237 //construct h 238 for (j = 0; j < l; j++) { 239 for (i = 0; i < l; i++) { 240 H(i, j) = Q(i, j) - xi[i] - xi[j] + nu; 241 } 242 } 243 #ifdef PETSC_MISSING_LAPACK_GELSS 244 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "NGMRES with LS requires the LAPACK GELSS routine."); 245 #else 246 ngmres->m = PetscBLASIntCast(l); 247 ngmres->n = PetscBLASIntCast(l); 248 ngmres->info = PetscBLASIntCast(0); 249 ngmres->rcond = -1.; 250 ngmres->nrhs = 1; 251 #ifdef PETSC_USE_COMPLEX 252 LAPACKgelss_(&ngmres->m, 253 &ngmres->n, 254 &ngmres->nrhs, 255 ngmres->h, 256 &ngmres->lda, 257 ngmres->beta, 258 &ngmres->ldb, 259 ngmres->s, 260 &ngmres->rcond, 261 &ngmres->rank, 262 ngmres->work, 263 &ngmres->lwork, 264 ngmres->rwork, 265 &ngmres->info); 266 #else 267 LAPACKgelss_(&ngmres->m, 268 &ngmres->n, 269 &ngmres->nrhs, 270 ngmres->h, 271 &ngmres->lda, 272 ngmres->beta, 273 &ngmres->ldb, 274 ngmres->s, 275 &ngmres->rcond, 276 &ngmres->rank, 277 ngmres->work, 278 &ngmres->lwork, 279 &ngmres->info); 280 #endif 281 if (ngmres->info < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to GELSS"); 282 if (ngmres->info > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"SVD failed to converge"); 283 #endif 284 285 alph_total = 0.; 286 for (i = 0; i < l; i++) { 287 alph_total += beta[i]; 288 } 289 ierr = VecCopy(x, x_A);CHKERRQ(ierr); 290 ierr = VecScale(x_A, 1. - alph_total);CHKERRQ(ierr); 291 292 for(i=0;i<l;i++){ 293 ierr= VecAXPY(x_A, beta[i], xdot[i]);CHKERRQ(ierr); 294 } 295 ierr = SNESComputeFunction(snes, x_A, r_A);CHKERRQ(ierr); 296 ierr = VecNorm(r_A, NORM_2, &r_A_norm);CHKERRQ(ierr); 297 298 selectA = PETSC_TRUE; 299 //Conditions for choosing the accelerated answer -- 300 301 //Criterion A -- the norm of the function isn't increased above the minimum by too much 302 if (r_A_norm >= ngmres->gammaA*r_min_norm) { 303 selectA = PETSC_FALSE; 304 } 305 306 //Criterion B -- the choice of x^A isn't too close to some other choice 307 ierr=VecCopy(x_A,d);CHKERRQ(ierr); 308 ierr=VecAXPY(d,-1,x);CHKERRQ(ierr); 309 ierr=VecNorm(d,NORM_2,&d_norm);CHKERRQ(ierr); 310 d_min_norm=10000000; 311 for(i=0;i<l;i++) { 312 ierr=VecCopy(x_A,d);CHKERRQ(ierr); 313 ierr=VecAXPY(d,-1,xdot[i]);CHKERRQ(ierr); 314 ierr=VecNorm(d,NORM_2,&d_cur_norm);CHKERRQ(ierr); 315 if(d_cur_norm<d_min_norm) d_min_norm=d_cur_norm; 316 } 317 if (ngmres->epsilonB*d_norm<d_min_norm || sqrt(r_norm)<ngmres->deltaB*sqrt(r_min_norm)) { 318 } else { 319 selectA=PETSC_FALSE; 320 } 321 322 323 if (selectA) { 324 if (ngmres->debug) 325 PetscPrintf(PETSC_COMM_WORLD, "picked r_A, ||r_A||_2 = %e, ||r_M||_2 = %e\n", r_A_norm, r_norm); 326 //copy it over 327 r_norm = r_A_norm; 328 nu = r_norm*r_norm; 329 ierr = VecCopy(r_A, r);CHKERRQ(ierr); 330 ierr = VecCopy(x_A, x);CHKERRQ(ierr); 331 } else { 332 if(ngmres->debug) 333 PetscPrintf(PETSC_COMM_WORLD, "picked r_M, ||r_A||_2 = %e, ||r_M||_2 = %e\n", r_A_norm, r_norm); 334 } 335 336 selectRestart = PETSC_FALSE; 337 338 //maximum iteration criterion 339 if (k_restart > ngmres->k_rmax) { 340 selectRestart = PETSC_TRUE; 341 } 342 343 //difference stagnation restart 344 if ((ngmres->epsilonB*d_norm > d_min_norm) && 345 (sqrt(r_A_norm) > ngmres->deltaB*sqrt(r_min_norm))) { 346 if (ngmres->debug) 347 PetscPrintf(PETSC_COMM_WORLD, "difference restart: %e > %e\n", ngmres->epsilonB*d_norm, d_min_norm); 348 selectRestart = PETSC_TRUE; 349 } 350 351 // residual stagnation restart 352 if (sqrt(r_A_norm) > ngmres->gammaC*sqrt(r_min_norm)) { 353 if (ngmres->debug) 354 PetscPrintf(PETSC_COMM_WORLD, "residual restart: %e > %e\n", sqrt(r_A_norm), ngmres->gammaC*sqrt(r_min_norm)); 355 selectRestart = PETSC_TRUE; 356 } 357 358 if (selectRestart) { 359 if (ngmres->debug) 360 PetscPrintf(PETSC_COMM_WORLD, "Restarted at iteration %d\n", k_restart); 361 k_restart = 1; 362 l = 1; 363 //q_{11} = nu 364 ngmres->r_norms[0] = r_norm; 365 nu = r_norm*r_norm; 366 Q(0,0) = nu; 367 //rdot[0] = r 368 ierr = VecCopy(x, xdot[0]);CHKERRQ(ierr); 369 ierr = VecCopy(r, rdot[0]);CHKERRQ(ierr); 370 } else { 371 //select the current size of the subspace 372 if (l < ngmres->msize) { 373 l++; 374 } 375 k_restart++; 376 //place the current entry in the list of previous entries 377 ierr = VecCopy(r, rdot[ivec]);CHKERRQ(ierr); 378 ierr = VecCopy(x, xdot[ivec]);CHKERRQ(ierr); 379 ngmres->r_norms[ivec] = r_norm; 380 if (r_min_norm > r_norm) r_min_norm = r_norm; //the minimum norm is now of r^A 381 for (i = 0; i < l; i++) { 382 VecDot(r, rdot[i], &qentry); 383 Q(i, ivec) = qentry; 384 Q(ivec, i) = qentry; 385 } 386 } 387 388 SNESLogConvHistory(snes, r_norm, k); 389 ierr = SNESMonitor(snes, k, r_norm);CHKERRQ(ierr); 390 391 snes->iter =k; 392 ierr = (*snes->ops->converged)(snes,snes->iter,0.0,0.0,r_norm,&snes->reason,snes->cnvP);CHKERRQ(ierr); 393 if (snes->reason) PetscFunctionReturn(0); 394 } 395 snes->reason = SNES_DIVERGED_MAX_IT; 396 PetscFunctionReturn(0); 397 } 398 399 400 401 EXTERN_C_BEGIN 402 #undef __FUNCT__ 403 #define __FUNCT__ "SNESCreate_NGMRES" 404 PetscErrorCode SNESCreate_NGMRES(SNES snes) 405 { 406 SNES_NGMRES *ngmres; 407 PetscErrorCode ierr; 408 409 PetscFunctionBegin; 410 snes->ops->destroy = SNESDestroy_NGMRES; 411 snes->ops->setup = SNESSetUp_NGMRES; 412 snes->ops->setfromoptions = SNESSetFromOptions_NGMRES; 413 snes->ops->view = SNESView_NGMRES; 414 snes->ops->solve = SNESSolve_NGMRES; 415 snes->ops->reset = SNESReset_NGMRES; 416 417 ierr = PetscNewLog(snes, SNES_NGMRES, &ngmres);CHKERRQ(ierr); 418 snes->data = (void*) ngmres; 419 ngmres->msize = 10; 420 ngmres->debug = PETSC_FALSE; 421 422 ngmres->gammaA = 2.; 423 ngmres->gammaC = 2.; 424 ngmres->deltaB = 0.99; 425 ngmres->epsilonB = 0.01; 426 ngmres->k_rmax = 100; 427 428 ierr = SNESGetPC(snes, &snes->pc);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 EXTERN_C_END 432