1 /* 2 Contributed by Patrick Sanan and Sascha M. Schnepp 3 */ 4 5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I "petscksp.h" I*/ 6 7 static PetscBool cited = PETSC_FALSE; 8 static const char citation[] = 9 "@article{SSM2016,\n" 10 " author = {P. Sanan and S.M. Schnepp and D.A. May},\n" 11 " title = {Pipelined, Flexible Krylov Subspace Methods},\n" 12 " journal = {SIAM Journal on Scientific Computing},\n" 13 " volume = {38},\n" 14 " number = {5},\n" 15 " pages = {C441-C470},\n" 16 " year = {2016},\n" 17 " doi = {10.1137/15M1049130},\n" 18 " URL = {http://dx.doi.org/10.1137/15M1049130},\n" 19 " eprint = {http://dx.doi.org/10.1137/15M1049130}\n" 20 "}\n"; 21 22 #define PIPEFGMRES_DELTA_DIRECTIONS 10 23 #define PIPEFGMRES_DEFAULT_MAXK 30 24 25 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt); 26 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*); 27 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt); 28 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP); 29 30 /* 31 32 KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres. 33 34 This is called once, usually automatically by KSPSolve() or KSPSetUp(), 35 but can be called directly by KSPSetUp(). 36 37 */ 38 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) 39 { 40 PetscInt k; 41 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 42 const PetscInt max_k = pipefgmres->max_k; 43 44 PetscFunctionBegin; 45 PetscCall(KSPSetUp_GMRES(ksp)); 46 47 PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs)); 48 PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work)); 49 PetscCall(PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)))); 50 51 PetscCall(KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL)); 52 PetscCall(PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0])); 53 for (k=0; k < pipefgmres->vv_allocated; k++) { 54 pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k]; 55 } 56 57 PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs)); 58 PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work)); 59 PetscCall(PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)))); 60 61 PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux)); 62 PetscCall(PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)))); 63 64 PetscCall(KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL)); 65 PetscCall(PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0])); 66 for (k=0; k < pipefgmres->vv_allocated; k++) { 67 pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k]; 68 } 69 70 PetscFunctionReturn(0); 71 } 72 73 /* 74 75 KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart. Return residual 76 history if requested. 77 78 input parameters: 79 . pipefgmres - structure containing parameters and work areas 80 81 output parameters: 82 . itcount - number of iterations used. If null, ignored. 83 . converged - 0 if not converged 84 85 Notes: 86 On entry, the value in vector VEC_VV(0) should be 87 the initial residual. 88 89 */ 90 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp) 91 { 92 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 93 PetscReal res_norm; 94 PetscReal hapbnd,tt; 95 PetscScalar *hh,*hes,*lhh,shift = pipefgmres->shift; 96 PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ 97 PetscInt loc_it; /* local count of # of dir. in Krylov space */ 98 PetscInt max_k = pipefgmres->max_k; /* max # of directions Krylov space */ 99 PetscInt i,j,k; 100 Mat Amat,Pmat; 101 Vec Q,W; /* Pipelining vectors */ 102 Vec *redux = pipefgmres->redux; /* workspace for single reduction */ 103 104 PetscFunctionBegin; 105 if (itcount) *itcount = 0; 106 107 /* Assign simpler names to these vectors, allocated as pipelining workspace */ 108 Q = VEC_Q; 109 W = VEC_W; 110 111 /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/ 112 /* Note that we add an extra value here to allow for a single reduction */ 113 if (!pipefgmres->orthogwork) { PetscCall(PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork)); 114 } 115 lhh = pipefgmres->orthogwork; 116 117 /* Number of pseudo iterations since last restart is the number 118 of prestart directions */ 119 loc_it = 0; 120 121 /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in 122 KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln. 123 Note that when KSPPIPEFGMRESBuildSoln is called from this function, 124 (loc_it -1) is passed, so the two are equivalent */ 125 pipefgmres->it = (loc_it - 1); 126 127 /* initial residual is in VEC_VV(0) - compute its norm*/ 128 PetscCall(VecNorm(VEC_VV(0),NORM_2,&res_norm)); 129 130 /* first entry in right-hand-side of hessenberg system is just 131 the initial residual norm */ 132 *RS(0) = res_norm; 133 134 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 135 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 136 else ksp->rnorm = 0; 137 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 138 PetscCall(KSPLogResidualHistory(ksp,ksp->rnorm)); 139 PetscCall(KSPMonitor(ksp,ksp->its,ksp->rnorm)); 140 141 /* check for the convergence - maybe the current guess is good enough */ 142 PetscCall((*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP)); 143 if (ksp->reason) { 144 if (itcount) *itcount = 0; 145 PetscFunctionReturn(0); 146 } 147 148 /* scale VEC_VV (the initial residual) */ 149 PetscCall(VecScale(VEC_VV(0),1.0/res_norm)); 150 151 /* Fill the pipeline */ 152 PetscCall(KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it))); 153 PetscCall(PCGetOperators(ksp->pc,&Amat,&Pmat)); 154 PetscCall(KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it))); 155 PetscCall(VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it))); /* Note shift */ 156 157 /* MAIN ITERATION LOOP BEGINNING*/ 158 /* keep iterating until we have converged OR generated the max number 159 of directions OR reached the max number of iterations for the method */ 160 while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { 161 if (loc_it) { 162 PetscCall(KSPLogResidualHistory(ksp,res_norm)); 163 PetscCall(KSPMonitor(ksp,ksp->its,res_norm)); 164 } 165 pipefgmres->it = (loc_it - 1); 166 167 /* see if more space is needed for work vectors */ 168 if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { 169 PetscCall(KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1)); 170 /* (loc_it+1) is passed in as number of the first vector that should 171 be allocated */ 172 } 173 174 /* Note that these inner products are with "Z" now, so 175 in particular, lhh[loc_it] is the 'barred' or 'shifted' value, 176 not the value from the equivalent FGMRES run (even in exact arithmetic) 177 That is, the H we need for the Arnoldi relation is different from the 178 coefficients we use in the orthogonalization process,because of the shift */ 179 180 /* Do some local twiddling to allow for a single reduction */ 181 for (i=0;i<loc_it+1;i++) { 182 redux[i] = VEC_VV(i); 183 } 184 redux[loc_it+1] = ZVEC(loc_it); 185 186 /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */ 187 PetscCall(VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh)); 188 189 /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/ 190 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)))); 191 192 /* The work to be overlapped with the inner products follows. 193 This is application of the preconditioner and the operator 194 to compute intermediate quantites which will be combined (locally) 195 with the results of the inner products. 196 */ 197 PetscCall(KSP_PCApply(ksp,ZVEC(loc_it),Q)); 198 PetscCall(PCGetOperators(ksp->pc,&Amat,&Pmat)); 199 PetscCall(KSP_MatMult(ksp,Amat,Q,W)); 200 201 /* Compute inner products of the new direction with previous directions, 202 and the norm of the to-be-orthogonalized direction "Z". 203 This information is enough to build the required entries 204 of H. The inner product with VEC_VV(it_loc) is 205 *different* than in the standard FGMRES and need to be dealt with specially. 206 That is, for standard FGMRES the orthogonalization coefficients are the same 207 as the coefficients used in the Arnoldi relation to reconstruct, but here this 208 is not true (albeit only for the one entry of H which we "unshift" below. */ 209 210 /* Finish the dot product, retrieving the extra entry */ 211 PetscCall(VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh)); 212 tt = PetscRealPart(lhh[loc_it+1]); 213 214 /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt 215 Note that the Hessenberg entries require a shift, as these are for the 216 relation AU = VH, which is wrt unshifted basis vectors */ 217 hh = HH(0,loc_it); hes=HES(0,loc_it); 218 for (j=0; j<loc_it; j++) { 219 hh[j] = lhh[j]; 220 hes[j] = lhh[j]; 221 } 222 hh[loc_it] = lhh[loc_it] + shift; 223 hes[loc_it] = lhh[loc_it] + shift; 224 225 /* we delay applying the shift here */ 226 for (j=0; j<=loc_it; j++) { 227 lhh[j] = -lhh[j]; /* flip sign */ 228 } 229 230 /* Compute the norm of the un-normalized new direction using the rearranged formula 231 Note that these are shifted ("barred") quantities */ 232 for (k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]))); 233 /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */ 234 if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0 ; 235 if (tt < 0.0) { 236 /* If we detect square root breakdown in the norm, we must restart the algorithm. 237 Here this means we simply break the current loop and reconstruct the solution 238 using the basis we have computed thus far. Note that by breaking immediately, 239 we do not update the iteration count, so computation done in this iteration 240 should be disregarded. 241 */ 242 PetscCall(PetscInfo(ksp,"Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n",ksp->its,(double)tt)); 243 break; 244 } else { 245 tt = PetscSqrtReal(tt); 246 } 247 248 /* new entry in hessenburg is the 2-norm of our new direction */ 249 hh[loc_it+1] = tt; 250 hes[loc_it+1] = tt; 251 252 /* The recurred computation for the new direction 253 The division by tt is delayed to the happy breakdown check later 254 Note placement BEFORE the unshift 255 */ 256 PetscCall(VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1))); 257 PetscCall(VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0))); 258 /* (VEC_VV(loc_it+1) is not normalized yet) */ 259 260 /* The recurred computation for the preconditioned vector (u) */ 261 PetscCall(VecCopy(Q,PREVEC(loc_it+1))); 262 PetscCall(VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0))); 263 PetscCall(VecScale(PREVEC(loc_it+1),1.0/tt)); 264 265 /* Unshift an entry in the GS coefficients ("removing the bar") */ 266 lhh[loc_it] -= shift; 267 268 /* The recurred computation for z (Au) 269 Note placement AFTER the "unshift" */ 270 PetscCall(VecCopy(W,ZVEC(loc_it+1))); 271 PetscCall(VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0))); 272 PetscCall(VecScale(ZVEC(loc_it+1),1.0/tt)); 273 274 /* Happy Breakdown Check */ 275 hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); 276 /* RS(loc_it) contains the res_norm from the last iteration */ 277 hapbnd = PetscMin(pipefgmres->haptol,hapbnd); 278 if (tt > hapbnd) { 279 /* scale new direction by its norm */ 280 PetscCall(VecScale(VEC_VV(loc_it+1),1.0/tt)); 281 } else { 282 /* This happens when the solution is exactly reached. */ 283 /* So there is no new direction... */ 284 PetscCall(VecSet(VEC_TEMP,0.0)); /* set VEC_TEMP to 0 */ 285 hapend = PETSC_TRUE; 286 } 287 /* note that for pipefgmres we could get HES(loc_it+1, loc_it) = 0 and the 288 current solution would not be exact if HES was singular. Note that 289 HH non-singular implies that HES is not singular, and HES is guaranteed 290 to be nonsingular when PREVECS are linearly independent and A is 291 nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity 292 of HES). So we should really add a check to verify that HES is nonsingular.*/ 293 294 /* Note that to be thorough, in debug mode, one could call a LAPACK routine 295 here to check that the hessenberg matrix is indeed non-singular (since 296 FGMRES does not guarantee this) */ 297 298 /* Now apply rotations to new col of hessenberg (and right side of system), 299 calculate new rotation, and get new residual norm at the same time*/ 300 PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm)); 301 if (ksp->reason) break; 302 303 loc_it++; 304 pipefgmres->it = (loc_it-1); /* Add this here in case it has converged */ 305 306 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 307 ksp->its++; 308 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 309 else ksp->rnorm = 0; 310 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 311 312 PetscCall((*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP)); 313 314 /* Catch error in happy breakdown and signal convergence and break from loop */ 315 if (hapend) { 316 if (!ksp->reason) { 317 PetscCheck(!ksp->errorifnotconverged,PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm); 318 else { 319 ksp->reason = KSP_DIVERGED_BREAKDOWN; 320 break; 321 } 322 } 323 } 324 } 325 /* END OF ITERATION LOOP */ 326 PetscCall(KSPLogResidualHistory(ksp,ksp->rnorm)); 327 328 /* 329 Monitor if we know that we will not return for a restart */ 330 if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) { 331 PetscCall(KSPMonitor(ksp,ksp->its,ksp->rnorm)); 332 } 333 334 if (itcount) *itcount = loc_it; 335 336 /* 337 Down here we have to solve for the "best" coefficients of the Krylov 338 columns, add the solution values together, and possibly unwind the 339 preconditioning from the solution 340 */ 341 342 /* Form the solution (or the solution so far) */ 343 /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln 344 properly navigates */ 345 346 PetscCall(KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1)); 347 348 PetscFunctionReturn(0); 349 } 350 351 /* 352 KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method. 353 354 Input Parameter: 355 . ksp - the Krylov space object that was set to use pipefgmres 356 357 Output Parameter: 358 . outits - number of iterations used 359 360 */ 361 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) 362 { 363 PetscInt its,itcount; 364 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 365 PetscBool guess_zero = ksp->guess_zero; 366 367 PetscFunctionBegin; 368 /* We have not checked these routines for use with complex numbers. The inner products 369 are likely not defined correctly for that case */ 370 PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX),PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars"); 371 372 PetscCall(PetscCitationsRegister(citation,&cited)); 373 374 PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd,PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); 375 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 376 ksp->its = 0; 377 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 378 379 itcount = 0; 380 ksp->reason = KSP_CONVERGED_ITERATING; 381 while (!ksp->reason) { 382 PetscCall(KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs)); 383 PetscCall(KSPPIPEFGMRESCycle(&its,ksp)); 384 itcount += its; 385 if (itcount >= ksp->max_it) { 386 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; 387 break; 388 } 389 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ 390 } 391 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ 392 PetscFunctionReturn(0); 393 } 394 395 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) 396 { 397 PetscFunctionBegin; 398 PetscCall(KSPReset_PIPEFGMRES(ksp)); 399 PetscCall(KSPDestroy_GMRES(ksp)); 400 PetscFunctionReturn(0); 401 } 402 403 /* 404 KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the 405 current iterates. 406 407 Input parameters: 408 nrs - work area of size it + 1. 409 vguess - index of initial guess 410 vdest - index of result. Note that vguess may == vdest (replace 411 guess with the solution). 412 it - HH upper triangular part is a block of size (it+1) x (it+1) 413 414 This is an internal routine that knows about the PIPEFGMRES internals. 415 */ 416 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it) 417 { 418 PetscScalar tt; 419 PetscInt k,j; 420 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 421 422 PetscFunctionBegin; 423 /* Solve for solution vector that minimizes the residual */ 424 425 if (it < 0) { /* no pipefgmres steps have been performed */ 426 PetscCall(VecCopy(vguess,vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */ 427 PetscFunctionReturn(0); 428 } 429 430 /* solve the upper triangular system - RS is the right side and HH is 431 the upper triangular matrix - put soln in nrs */ 432 if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it); 433 else nrs[it] = 0.0; 434 435 for (k=it-1; k>=0; k--) { 436 tt = *RS(k); 437 for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j]; 438 nrs[k] = tt / *HH(k,k); 439 } 440 441 /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */ 442 PetscCall(VecZeroEntries(VEC_TEMP)); 443 PetscCall(VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0))); 444 445 /* add solution to previous solution */ 446 if (vdest == vguess) { 447 PetscCall(VecAXPY(vdest,1.0,VEC_TEMP)); 448 } else { 449 PetscCall(VecWAXPY(vdest,1.0,VEC_TEMP,vguess)); 450 } 451 PetscFunctionReturn(0); 452 } 453 454 /* 455 456 KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization. 457 Return new residual. 458 459 input parameters: 460 461 . ksp - Krylov space object 462 . it - plane rotations are applied to the (it+1)th column of the 463 modified hessenberg (i.e. HH(:,it)) 464 . hapend - PETSC_FALSE not happy breakdown ending. 465 466 output parameters: 467 . res - the new residual 468 469 */ 470 /* 471 . it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this 472 */ 473 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res) 474 { 475 PetscScalar *hh,*cc,*ss,*rs; 476 PetscInt j; 477 PetscReal hapbnd; 478 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 479 480 PetscFunctionBegin; 481 hh = HH(0,it); /* pointer to beginning of column to update */ 482 cc = CC(0); /* beginning of cosine rotations */ 483 ss = SS(0); /* beginning of sine rotations */ 484 rs = RS(0); /* right hand side of least squares system */ 485 486 /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ 487 for (j=0; j<=it+1; j++) *HES(j,it) = hh[j]; 488 489 /* check for the happy breakdown */ 490 hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol); 491 if (PetscAbsScalar(hh[it+1]) < hapbnd) { 492 PetscCall(PetscInfo(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)))); 493 *hapend = PETSC_TRUE; 494 } 495 496 /* Apply all the previously computed plane rotations to the new column 497 of the Hessenberg matrix */ 498 /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), 499 and some refs have [c s ; -conj(s) c] (don't be confused!) */ 500 501 for (j=0; j<it; j++) { 502 PetscScalar hhj = hh[j]; 503 hh[j] = PetscConj(cc[j])*hhj + ss[j]*hh[j+1]; 504 hh[j+1] = -ss[j] *hhj + cc[j]*hh[j+1]; 505 } 506 507 /* 508 compute the new plane rotation, and apply it to: 509 1) the right-hand-side of the Hessenberg system (RS) 510 note: it affects RS(it) and RS(it+1) 511 2) the new column of the Hessenberg matrix 512 note: it affects HH(it,it) which is currently pointed to 513 by hh and HH(it+1, it) (*(hh+1)) 514 thus obtaining the updated value of the residual... 515 */ 516 517 /* compute new plane rotation */ 518 519 if (!*hapend) { 520 PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1]))); 521 if (delta == 0.0) { 522 ksp->reason = KSP_DIVERGED_NULL; 523 PetscFunctionReturn(0); 524 } 525 526 cc[it] = hh[it] / delta; /* new cosine value */ 527 ss[it] = hh[it+1] / delta; /* new sine value */ 528 529 hh[it] = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1]; 530 rs[it+1] = -ss[it]*rs[it]; 531 rs[it] = PetscConj(cc[it])*rs[it]; 532 *res = PetscAbsScalar(rs[it+1]); 533 } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply 534 another rotation matrix (so RH doesn't change). The new residual is 535 always the new sine term times the residual from last time (RS(it)), 536 but now the new sine rotation would be zero...so the residual should 537 be zero...so we will multiply "zero" by the last residual. This might 538 not be exactly what we want to do here -could just return "zero". */ 539 540 *res = 0.0; 541 } 542 PetscFunctionReturn(0); 543 } 544 545 /* 546 KSPBuildSolution_PIPEFGMRES 547 548 Input Parameter: 549 . ksp - the Krylov space object 550 . ptr- 551 552 Output Parameter: 553 . result - the solution 554 555 Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle 556 calls directly. 557 558 */ 559 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result) 560 { 561 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 562 563 PetscFunctionBegin; 564 if (!ptr) { 565 if (!pipefgmres->sol_temp) { 566 PetscCall(VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp)); 567 PetscCall(PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp)); 568 } 569 ptr = pipefgmres->sol_temp; 570 } 571 if (!pipefgmres->nrs) { 572 /* allocate the work area */ 573 PetscCall(PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs)); 574 PetscCall(PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar))); 575 } 576 577 PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it)); 578 if (result) *result = ptr; 579 PetscFunctionReturn(0); 580 } 581 582 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp) 583 { 584 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 585 PetscBool flg; 586 PetscScalar shift; 587 588 PetscFunctionBegin; 589 PetscCall(KSPSetFromOptions_GMRES(PetscOptionsObject,ksp)); 590 PetscOptionsHeadBegin(PetscOptionsObject,"KSP pipelined FGMRES Options"); 591 PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg)); 592 if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp,shift)); 593 PetscOptionsHeadEnd(); 594 PetscFunctionReturn(0); 595 } 596 597 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer) 598 { 599 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 600 PetscBool iascii,isstring; 601 602 PetscFunctionBegin; 603 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 604 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring)); 605 606 if (iascii) { 607 PetscCall(PetscViewerASCIIPrintf(viewer," restart=%" PetscInt_FMT "\n",pipefgmres->max_k)); 608 PetscCall(PetscViewerASCIIPrintf(viewer," happy breakdown tolerance %g\n",(double)pipefgmres->haptol)); 609 #if defined(PETSC_USE_COMPLEX) 610 PetscCall(PetscViewerASCIIPrintf(viewer," shift=%g+%gi\n",(double)PetscRealPart(pipefgmres->shift),(double)PetscImaginaryPart(pipefgmres->shift))); 611 #else 612 PetscCall(PetscViewerASCIIPrintf(viewer," shift=%g\n",(double)pipefgmres->shift)); 613 #endif 614 } else if (isstring) { 615 PetscCall(PetscViewerStringSPrintf(viewer,"restart %" PetscInt_FMT,pipefgmres->max_k)); 616 #if defined(PETSC_USE_COMPLEX) 617 PetscCall(PetscViewerStringSPrintf(viewer," shift=%g+%gi\n",(double)PetscRealPart(pipefgmres->shift),(double)PetscImaginaryPart(pipefgmres->shift))); 618 #else 619 PetscCall(PetscViewerStringSPrintf(viewer," shift=%g\n",(double)pipefgmres->shift)); 620 #endif 621 } 622 PetscFunctionReturn(0); 623 } 624 625 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) 626 { 627 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 628 PetscInt i; 629 630 PetscFunctionBegin; 631 PetscCall(PetscFree(pipefgmres->prevecs)); 632 PetscCall(PetscFree(pipefgmres->zvecs)); 633 for (i=0; i<pipefgmres->nwork_alloc; i++) { 634 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i])); 635 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i])); 636 } 637 PetscCall(PetscFree(pipefgmres->prevecs_user_work)); 638 PetscCall(PetscFree(pipefgmres->zvecs_user_work)); 639 PetscCall(PetscFree(pipefgmres->redux)); 640 PetscCall(KSPReset_GMRES(ksp)); 641 PetscFunctionReturn(0); 642 } 643 644 /*MC 645 KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method. 646 647 A flexible, 1-stage pipelined variant of GMRES. 648 649 Options Database Keys: 650 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against 651 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence) 652 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of 653 . -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift() 654 vectors are allocated as needed) 655 - -ksp_gmres_krylov_monitor - plot the Krylov space generated 656 657 Level: intermediate 658 659 Notes: 660 661 This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter. 662 663 A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator. 664 665 Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES). 666 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods. 667 See the FAQ on the PETSc website for details. 668 669 Developer Notes: 670 This class is subclassed off of KSPGMRES. 671 672 Reference: 673 P. Sanan, S.M. Schnepp, and D.A. May, 674 "Pipelined, Flexible Krylov Subspace Methods," 675 SIAM Journal on Scientific Computing 2016 38:5, C441-C470, 676 DOI: 10.1137/15M1049130 677 678 .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPLGMRES, KSPPIPECG, KSPPIPECR, KSPPGMRES, KSPFGMRES 679 KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESMonitorKrylov(), KSPPIPEFGMRESSetShift() 680 M*/ 681 682 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) 683 { 684 KSP_PIPEFGMRES *pipefgmres; 685 686 PetscFunctionBegin; 687 PetscCall(PetscNewLog(ksp,&pipefgmres)); 688 689 ksp->data = (void*)pipefgmres; 690 ksp->ops->buildsolution = KSPBuildSolution_PIPEFGMRES; 691 ksp->ops->setup = KSPSetUp_PIPEFGMRES; 692 ksp->ops->solve = KSPSolve_PIPEFGMRES; 693 ksp->ops->reset = KSPReset_PIPEFGMRES; 694 ksp->ops->destroy = KSPDestroy_PIPEFGMRES; 695 ksp->ops->view = KSPView_PIPEFGMRES; 696 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFGMRES; 697 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES; 698 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES; 699 700 PetscCall(KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3)); 701 PetscCall(KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1)); 702 703 PetscCall(PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES)); 704 PetscCall(PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES)); 705 PetscCall(PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES)); 706 707 pipefgmres->nextra_vecs = 1; 708 pipefgmres->haptol = 1.0e-30; 709 pipefgmres->q_preallocate = 0; 710 pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS; 711 pipefgmres->orthog = NULL; 712 pipefgmres->nrs = NULL; 713 pipefgmres->sol_temp = NULL; 714 pipefgmres->max_k = PIPEFGMRES_DEFAULT_MAXK; 715 pipefgmres->Rsvd = NULL; 716 pipefgmres->orthogwork = NULL; 717 pipefgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER; 718 pipefgmres->shift = 1.0; 719 PetscFunctionReturn(0); 720 } 721 722 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it) 723 { 724 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 725 PetscInt nwork = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */ 726 PetscInt nalloc; /* number to allocate */ 727 PetscInt k; 728 729 PetscFunctionBegin; 730 nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate 731 in a single chunk */ 732 733 /* Adjust the number to allocate to make sure that we don't exceed the 734 number of available slots (pipefgmres->vecs_allocated)*/ 735 if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) { 736 nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET; 737 } 738 if (!nalloc) PetscFunctionReturn(0); 739 740 pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */ 741 742 /* work vectors */ 743 PetscCall(KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL)); 744 PetscCall(PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork])); 745 for (k=0; k < nalloc; k++) { 746 pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k]; 747 } 748 /* specify size of chunk allocated */ 749 pipefgmres->mwork_alloc[nwork] = nalloc; 750 751 /* preconditioned vectors (note we don't use VEC_OFFSET) */ 752 PetscCall(KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL)); 753 PetscCall(PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork])); 754 for (k=0; k < nalloc; k++) { 755 pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k]; 756 } 757 758 PetscCall(KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL)); 759 PetscCall(PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork])); 760 for (k=0; k < nalloc; k++) { 761 pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k]; 762 } 763 764 /* increment the number of work vector chunks */ 765 pipefgmres->nwork_alloc++; 766 PetscFunctionReturn(0); 767 } 768 769 /*@ 770 KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver. 771 772 A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there). 773 774 Logically Collective on ksp 775 776 Input Parameters: 777 + ksp - the Krylov space context 778 - shift - the shift 779 780 Level: intermediate 781 782 Options Database: 783 . -ksp_pipefgmres_shift <shift> - set the shift parameter 784 785 .seealso: KSPComputeEigenvalues() 786 @*/ 787 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift) 788 { 789 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 790 791 PetscFunctionBegin; 792 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1); 793 PetscValidLogicalCollectiveScalar(ksp,shift,2); 794 pipefgmres->shift = shift; 795 PetscFunctionReturn(0); 796 } 797