1 /* Defines the basic SNES object */ 2 #include <../src/snes/impls/ngmres/snesngmres.h> 3 #include <petscblaslapack.h> 4 5 6 /*MC 7 SNESNGMRES - The Nonlinear Generalized Minimum Residual (NGMRES) method of Oosterlee and Washio. 8 9 Level: beginner 10 11 "Krylov Subspace Acceleration of Nonlinear Multigrid with Application to Recirculating Flows", C. W. Oosterlee and T. Washio, 12 SIAM Journal on Scientific Computing, 21(5), 2000. 13 14 .seealso: SNESCreate(), SNES, SNESSetType(), SNESType (for list of available types) 15 M*/ 16 17 #undef __FUNCT__ 18 #define __FUNCT__ "SNESReset_NGMRES" 19 PetscErrorCode SNESReset_NGMRES(SNES snes) 20 { 21 SNES_NGMRES *ngmres = (SNES_NGMRES*) snes->data; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 ierr = VecDestroyVecs(ngmres->msize, &ngmres->rdot);CHKERRQ(ierr); 26 ierr = VecDestroyVecs(ngmres->msize, &ngmres->xdot);CHKERRQ(ierr); 27 PetscFunctionReturn(0); 28 } 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "SNESDestroy_NGMRES" 32 PetscErrorCode SNESDestroy_NGMRES(SNES snes) 33 { 34 PetscErrorCode ierr; 35 36 PetscFunctionBegin; 37 ierr = SNESReset_NGMRES(snes);CHKERRQ(ierr); 38 if (snes->work) {ierr = VecDestroyVecs(snes->nwork, &snes->work);CHKERRQ(ierr);} 39 if (snes->data) { 40 SNES_NGMRES * ngmres = (SNES_NGMRES *)snes->data; 41 ierr = PetscFree5(ngmres->h, ngmres->beta, ngmres->xi, ngmres->r_norms, ngmres->q);CHKERRQ(ierr); 42 ierr = PetscFree(ngmres->s);CHKERRQ(ierr); 43 #if PETSC_USE_COMPLEX 44 ierr = PetscFree(ngmres->rwork); 45 #endif 46 ierr = PetscFree(ngmres->work); 47 } 48 ierr = PetscFree(snes->data); 49 PetscFunctionReturn(0); 50 } 51 52 #undef __FUNCT__ 53 #define __FUNCT__ "SNESSetUp_NGMRES" 54 PetscErrorCode SNESSetUp_NGMRES(SNES snes) 55 { 56 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 57 PetscInt msize,hsize; 58 PetscErrorCode ierr; 59 60 PetscFunctionBegin; 61 msize = ngmres->msize; /* restart size */ 62 hsize = msize * msize; 63 64 65 //explicit least squares minimization solve 66 ierr = PetscMalloc5(hsize,PetscScalar,&ngmres->h, 67 msize,PetscScalar,&ngmres->beta, 68 msize,PetscScalar,&ngmres->xi, 69 msize,PetscReal,&ngmres->r_norms, 70 hsize,PetscScalar,&ngmres->q);CHKERRQ(ierr); 71 ngmres->nrhs = 1; 72 ngmres->lda = msize; 73 ngmres->ldb = msize; 74 ierr = PetscMalloc(msize*sizeof(PetscScalar),&ngmres->s);CHKERRQ(ierr); 75 76 ierr = PetscMemzero(ngmres->h,hsize*sizeof(PetscScalar));CHKERRQ(ierr); 77 ierr = PetscMemzero(ngmres->q,hsize*sizeof(PetscScalar));CHKERRQ(ierr); 78 ierr = PetscMemzero(ngmres->xi,msize*sizeof(PetscScalar));CHKERRQ(ierr); 79 ierr = PetscMemzero(ngmres->beta,msize*sizeof(PetscScalar));CHKERRQ(ierr); 80 81 ngmres->lwork = 12*msize; 82 #if PETSC_USE_COMPLEX 83 ierr = PetscMalloc(sizeof(PetscReal)*ngmres->lwork,&ngmres->rwork); 84 #endif 85 ierr = PetscMalloc(sizeof(PetscScalar)*ngmres->lwork,&ngmres->work); 86 87 ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->xdot);CHKERRQ(ierr); 88 ierr = VecDuplicateVecs(snes->vec_sol, ngmres->msize, &ngmres->rdot);CHKERRQ(ierr); 89 ierr = SNESDefaultGetWork(snes, 3);CHKERRQ(ierr); 90 PetscFunctionReturn(0); 91 } 92 93 #undef __FUNCT__ 94 #define __FUNCT__ "SNESSetFromOptions_NGMRES" 95 PetscErrorCode SNESSetFromOptions_NGMRES(SNES snes) 96 { 97 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 98 PetscErrorCode ierr; 99 100 PetscFunctionBegin; 101 ierr = PetscOptionsHead("SNES NGMRES options");CHKERRQ(ierr); 102 ierr = PetscOptionsInt("-snes_ngmres_m", "Number of directions", "SNES", ngmres->msize, &ngmres->msize, PETSC_NULL);CHKERRQ(ierr); 103 ierr = PetscOptionsInt("-snes_ngmres_restart", "Maximum iterations before restart.", "SNES", ngmres->k_rmax, &ngmres->k_rmax, PETSC_NULL);CHKERRQ(ierr); 104 ierr = PetscOptionsBool("-snes_ngmres_debug", "Debugging output for NGMRES", "SNES", ngmres->debug, &ngmres->debug, PETSC_NULL);CHKERRQ(ierr); 105 ierr = PetscOptionsReal("-snes_ngmres_gammaA", "Residual selection constant", "SNES", ngmres->gammaA, &ngmres->gammaA, PETSC_NULL);CHKERRQ(ierr); 106 ierr = PetscOptionsReal("-snes_ngmres_gammaC", "Residual restart constant", "SNES", ngmres->gammaC, &ngmres->gammaC, PETSC_NULL);CHKERRQ(ierr); 107 ierr = PetscOptionsReal("-snes_ngmres_epsilonB", "Difference selection constant", "SNES", ngmres->epsilonB, &ngmres->epsilonB, PETSC_NULL);CHKERRQ(ierr); 108 ierr = PetscOptionsReal("-snes_ngmres_deltaB", "Difference residual selection constant", "SNES", ngmres->deltaB, &ngmres->deltaB, PETSC_NULL);CHKERRQ(ierr); 109 ierr = PetscOptionsTail();CHKERRQ(ierr); 110 if ((ngmres->gammaA > ngmres->gammaC) && (ngmres->gammaC > 2.)) ngmres->gammaC = ngmres->gammaA; 111 PetscFunctionReturn(0); 112 } 113 114 #undef __FUNCT__ 115 #define __FUNCT__ "SNESView_NGMRES" 116 PetscErrorCode SNESView_NGMRES(SNES snes, PetscViewer viewer) 117 { 118 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 119 PetscBool iascii; 120 PetscErrorCode ierr; 121 122 PetscFunctionBegin; 123 ierr = PetscTypeCompare((PetscObject) viewer, PETSCVIEWERASCII, &iascii);CHKERRQ(ierr); 124 if (iascii) { 125 ierr = PetscViewerASCIIPrintf(viewer, " Size of space %d\n", ngmres->msize);CHKERRQ(ierr); 126 ierr = PetscViewerASCIIPrintf(viewer, " Maximum iterations before restart %d\n", ngmres->k_rmax);CHKERRQ(ierr); 127 } 128 PetscFunctionReturn(0); 129 } 130 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "SNESSolve_NGMRES" 134 135 PetscErrorCode SNESSolve_NGMRES(SNES snes) 136 { 137 SNES pc; 138 SNES_NGMRES *ngmres = (SNES_NGMRES *) snes->data; 139 140 141 142 //present solution, residual, and preconditioned residual 143 Vec x, r, f, b, d; 144 Vec x_A, r_A; 145 146 //previous iterations to construct the subspace 147 Vec *rdot = ngmres->rdot; 148 Vec *xdot = ngmres->xdot; 149 150 //coefficients and RHS to the minimization problem 151 PetscScalar *beta = ngmres->beta; 152 PetscScalar *xi = ngmres->xi; 153 PetscReal r_norm, r_A_norm; 154 PetscReal nu; 155 PetscScalar alph_total = 0.; 156 PetscScalar qentry; 157 PetscInt i, j, k, k_restart, l, ivec; 158 159 //solution selection data 160 PetscBool selectA, selectRestart; 161 PetscReal d_norm, d_min_norm, d_cur_norm; 162 PetscReal r_min_norm; 163 164 PetscErrorCode ierr; 165 166 PetscFunctionBegin; 167 //variable initialization 168 snes->reason = SNES_CONVERGED_ITERATING; 169 x = snes->vec_sol; 170 r = snes->vec_func; 171 f = snes->work[0]; 172 b = snes->vec_rhs; 173 x_A = snes->vec_sol_update; 174 r_A = snes->work[1]; 175 d = snes->work[2]; 176 ierr = VecDuplicate(x,&r);CHKERRQ(ierr); 177 178 ierr = SNESGetPC(snes, &pc);CHKERRQ(ierr); 179 ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); 180 snes->iter = 0; 181 snes->norm = 0.; 182 ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); 183 184 //initialization -- 185 186 //r = F(u) 187 ierr = SNESComputeFunction(snes, x, r);CHKERRQ(ierr); /* r = F(x) */ 188 if (snes->domainerror) { 189 snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; 190 PetscFunctionReturn(0); 191 } 192 193 //nu = (r, r); 194 ierr = VecNorm(r, NORM_2, &r_norm);CHKERRQ(ierr); 195 r_min_norm = r_norm; 196 nu = r_norm*r_norm; 197 if (PetscIsInfOrNanReal(r_norm)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_FP, "Infinite or not-a-number generated in norm"); 198 199 //q_{11} = nu 200 Q(0,0) = nu; 201 ngmres->r_norms[0] = r_norm; 202 //rdot[0] = r 203 ierr = VecCopy(x, xdot[0]);CHKERRQ(ierr); 204 ierr = VecCopy(r, rdot[0]);CHKERRQ(ierr); 205 206 ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); 207 snes->norm = r_norm; 208 ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); 209 SNESLogConvHistory(snes, r_norm, 0); 210 ierr = SNESMonitor(snes, 0, r_norm);CHKERRQ(ierr); 211 ierr = (*snes->ops->converged)(snes,0,0.0,0.0,r_norm,&snes->reason,snes->cnvP);CHKERRQ(ierr); 212 if (snes->reason) PetscFunctionReturn(0); 213 214 k_restart = 1; 215 l = 1; 216 for (k=1; k<snes->max_its; k++) { 217 218 // 219 220 //select which vector of the stored subspace will be updated 221 ivec = k_restart % ngmres->msize; //replace the last used part of the subspace 222 223 224 //Computation of x^M 225 ierr = SNESSolve(pc, b, x);CHKERRQ(ierr); 226 //r = F(x) 227 ierr = SNESComputeFunction(snes, x, r);CHKERRQ(ierr); 228 ierr = VecNorm(r, NORM_2, &r_norm);CHKERRQ(ierr); 229 //nu = (r, r) 230 ngmres->r_norms[ivec] = r_norm; 231 nu = r_norm*r_norm; 232 if (r_min_norm > r_norm) r_min_norm = r_norm; //the minimum norm is now of r^M 233 234 //construct the right hand side and xi factors 235 for (i = 0; i < l; i++) { 236 VecDot(rdot[i], r, &xi[i]); 237 beta[i] = nu - xi[i]; 238 } 239 240 //construct h 241 for (j = 0; j < l; j++) { 242 for (i = 0; i < l; i++) { 243 H(i, j) = Q(i, j) - xi[i] - xi[j] + nu; 244 } 245 } 246 #ifdef PETSC_MISSING_LAPACK_GELSS 247 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "NGMRES with LS requires the LAPACK GELSS routine."); 248 #else 249 ngmres->m = PetscBLASIntCast(l); 250 ngmres->n = PetscBLASIntCast(l); 251 ngmres->info = PetscBLASIntCast(0); 252 ngmres->rcond = -1.; 253 ngmres->nrhs = 1; 254 #ifdef PETSC_USE_COMPLEX 255 LAPACKgelss_(&ngmres->m, 256 &ngmres->n, 257 &ngmres->nrhs, 258 ngmres->h, 259 &ngmres->lda, 260 ngmres->beta, 261 &ngmres->ldb, 262 ngmres->s, 263 &ngmres->rcond, 264 &ngmres->rank, 265 ngmres->work, 266 &ngmres->lwork, 267 ngmres->rwork, 268 &ngmres->info); 269 #else 270 LAPACKgelss_(&ngmres->m, 271 &ngmres->n, 272 &ngmres->nrhs, 273 ngmres->h, 274 &ngmres->lda, 275 ngmres->beta, 276 &ngmres->ldb, 277 ngmres->s, 278 &ngmres->rcond, 279 &ngmres->rank, 280 ngmres->work, 281 &ngmres->lwork, 282 &ngmres->info); 283 #endif 284 if (ngmres->info < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to GELSS"); 285 if (ngmres->info > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"SVD failed to converge"); 286 #endif 287 288 alph_total = 0.; 289 for (i = 0; i < l; i++) { 290 alph_total += beta[i]; 291 } 292 ierr = VecCopy(x, x_A);CHKERRQ(ierr); 293 ierr = VecScale(x_A, 1. - alph_total);CHKERRQ(ierr); 294 295 for(i=0;i<l;i++){ 296 ierr= VecAXPY(x_A, beta[i], xdot[i]);CHKERRQ(ierr); 297 } 298 ierr = SNESComputeFunction(snes, x_A, r_A);CHKERRQ(ierr); 299 ierr = VecNorm(r_A, NORM_2, &r_A_norm);CHKERRQ(ierr); 300 301 selectA = PETSC_TRUE; 302 //Conditions for choosing the accelerated answer -- 303 304 //Criterion A -- the norm of the function isn't increased above the minimum by too much 305 if (r_A_norm >= ngmres->gammaA*r_min_norm) { 306 selectA = PETSC_FALSE; 307 } 308 309 //Criterion B -- the choice of x^A isn't too close to some other choice 310 ierr=VecCopy(x_A,d);CHKERRQ(ierr); 311 ierr=VecAXPY(d,-1,x);CHKERRQ(ierr); 312 ierr=VecNorm(d,NORM_2,&d_norm);CHKERRQ(ierr); 313 d_min_norm=10000000; 314 for(i=0;i<l;i++) { 315 ierr=VecCopy(x_A,d);CHKERRQ(ierr); 316 ierr=VecAXPY(d,-1,xdot[i]);CHKERRQ(ierr); 317 ierr=VecNorm(d,NORM_2,&d_cur_norm);CHKERRQ(ierr); 318 if(d_cur_norm<d_min_norm) d_min_norm=d_cur_norm; 319 } 320 if (ngmres->epsilonB*d_norm<d_min_norm || sqrt(r_norm)<ngmres->deltaB*sqrt(r_min_norm)) { 321 } else { 322 selectA=PETSC_FALSE; 323 } 324 325 326 if (selectA) { 327 if (ngmres->debug) 328 PetscPrintf(PETSC_COMM_WORLD, "picked r_A, ||r_A||_2 = %e, ||r_M||_2 = %e\n", r_A_norm, r_norm); 329 //copy it over 330 r_norm = r_A_norm; 331 nu = r_norm*r_norm; 332 ierr = VecCopy(r_A, r);CHKERRQ(ierr); 333 ierr = VecCopy(x_A, x);CHKERRQ(ierr); 334 } else { 335 if(ngmres->debug) 336 PetscPrintf(PETSC_COMM_WORLD, "picked r_M, ||r_A||_2 = %e, ||r_M||_2 = %e\n", r_A_norm, r_norm); 337 } 338 339 selectRestart = PETSC_FALSE; 340 341 //maximum iteration criterion 342 if (k_restart > ngmres->k_rmax) { 343 selectRestart = PETSC_TRUE; 344 } 345 346 //difference stagnation restart 347 if ((ngmres->epsilonB*d_norm > d_min_norm) && 348 (sqrt(r_A_norm) > ngmres->deltaB*sqrt(r_min_norm))) { 349 if (ngmres->debug) 350 PetscPrintf(PETSC_COMM_WORLD, "difference restart: %e > %e\n", ngmres->epsilonB*d_norm, d_min_norm); 351 selectRestart = PETSC_TRUE; 352 } 353 354 // residual stagnation restart 355 if (sqrt(r_A_norm) > ngmres->gammaC*sqrt(r_min_norm)) { 356 if (ngmres->debug) 357 PetscPrintf(PETSC_COMM_WORLD, "residual restart: %e > %e\n", sqrt(r_A_norm), ngmres->gammaC*sqrt(r_min_norm)); 358 selectRestart = PETSC_TRUE; 359 } 360 361 if (selectRestart) { 362 if (ngmres->debug) 363 PetscPrintf(PETSC_COMM_WORLD, "Restarted at iteration %d\n", k_restart); 364 k_restart = 1; 365 l = 1; 366 //q_{11} = nu 367 ngmres->r_norms[0] = r_norm; 368 nu = r_norm*r_norm; 369 Q(0,0) = nu; 370 //rdot[0] = r 371 ierr = VecCopy(x, xdot[0]);CHKERRQ(ierr); 372 ierr = VecCopy(r, rdot[0]);CHKERRQ(ierr); 373 } else { 374 //select the current size of the subspace 375 if (l < ngmres->msize) { 376 l++; 377 } 378 k_restart++; 379 //place the current entry in the list of previous entries 380 ierr = VecCopy(r, rdot[ivec]);CHKERRQ(ierr); 381 ierr = VecCopy(x, xdot[ivec]);CHKERRQ(ierr); 382 ngmres->r_norms[ivec] = r_norm; 383 if (r_min_norm > r_norm) r_min_norm = r_norm; //the minimum norm is now of r^A 384 for (i = 0; i < l; i++) { 385 VecDot(r, rdot[i], &qentry); 386 Q(i, ivec) = qentry; 387 Q(ivec, i) = qentry; 388 } 389 } 390 391 SNESLogConvHistory(snes, r_norm, k); 392 ierr = SNESMonitor(snes, k, r_norm);CHKERRQ(ierr); 393 394 snes->iter =k; 395 ierr = (*snes->ops->converged)(snes,snes->iter,0.0,0.0,r_norm,&snes->reason,snes->cnvP);CHKERRQ(ierr); 396 if (snes->reason) PetscFunctionReturn(0); 397 } 398 snes->reason = SNES_DIVERGED_MAX_IT; 399 PetscFunctionReturn(0); 400 } 401 402 403 404 EXTERN_C_BEGIN 405 #undef __FUNCT__ 406 #define __FUNCT__ "SNESCreate_NGMRES" 407 PetscErrorCode SNESCreate_NGMRES(SNES snes) 408 { 409 SNES_NGMRES *ngmres; 410 PetscErrorCode ierr; 411 412 PetscFunctionBegin; 413 snes->ops->destroy = SNESDestroy_NGMRES; 414 snes->ops->setup = SNESSetUp_NGMRES; 415 snes->ops->setfromoptions = SNESSetFromOptions_NGMRES; 416 snes->ops->view = SNESView_NGMRES; 417 snes->ops->solve = SNESSolve_NGMRES; 418 snes->ops->reset = SNESReset_NGMRES; 419 420 ierr = PetscNewLog(snes, SNES_NGMRES, &ngmres);CHKERRQ(ierr); 421 snes->data = (void*) ngmres; 422 ngmres->msize = 10; 423 ngmres->debug = PETSC_FALSE; 424 425 ngmres->gammaA = 2.; 426 ngmres->gammaC = 2.; 427 ngmres->deltaB = 0.9; 428 ngmres->epsilonB = 0.1; 429 ngmres->k_rmax = 200; 430 431 ierr = SNESGetPC(snes, &snes->pc);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434 EXTERN_C_END 435