1 /* 2 Contributed by Patrick Sanan and Sascha M. Schnepp 3 */ 4 5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I "petscksp.h" I*/ 6 7 static PetscBool cited = PETSC_FALSE; 8 static const char citation[] = 9 "@article{SSM2016,\n" 10 " author = {P. Sanan and S.M. Schnepp and D.A. May},\n" 11 " title = {Pipelined, Flexible Krylov Subspace Methods},\n" 12 " journal = {SIAM Journal on Scientific Computing},\n" 13 " volume = {38},\n" 14 " number = {5},\n" 15 " pages = {C441-C470},\n" 16 " year = {2016},\n" 17 " doi = {10.1137/15M1049130},\n" 18 " URL = {http://dx.doi.org/10.1137/15M1049130},\n" 19 " eprint = {http://dx.doi.org/10.1137/15M1049130}\n" 20 "}\n"; 21 22 #define PIPEFGMRES_DELTA_DIRECTIONS 10 23 #define PIPEFGMRES_DEFAULT_MAXK 30 24 25 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt); 26 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*); 27 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt); 28 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP); 29 30 /* 31 32 KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres. 33 34 This is called once, usually automatically by KSPSolve() or KSPSetUp(), 35 but can be called directly by KSPSetUp(). 36 37 */ 38 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) 39 { 40 PetscInt k; 41 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 42 const PetscInt max_k = pipefgmres->max_k; 43 44 PetscFunctionBegin; 45 PetscCall(KSPSetUp_GMRES(ksp)); 46 47 PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs)); 48 PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work)); 49 PetscCall(PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)))); 50 51 PetscCall(KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL)); 52 PetscCall(PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0])); 53 for (k=0; k < pipefgmres->vv_allocated; k++) { 54 pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k]; 55 } 56 57 PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs)); 58 PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work)); 59 PetscCall(PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)))); 60 61 PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux)); 62 PetscCall(PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)))); 63 64 PetscCall(KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL)); 65 PetscCall(PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0])); 66 for (k=0; k < pipefgmres->vv_allocated; k++) { 67 pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k]; 68 } 69 70 PetscFunctionReturn(0); 71 } 72 73 /* 74 75 KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart. Return residual 76 history if requested. 77 78 input parameters: 79 . pipefgmres - structure containing parameters and work areas 80 81 output parameters: 82 . itcount - number of iterations used. If null, ignored. 83 . converged - 0 if not converged 84 85 Notes: 86 On entry, the value in vector VEC_VV(0) should be 87 the initial residual. 88 89 */ 90 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp) 91 { 92 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 93 PetscReal res_norm; 94 PetscReal hapbnd,tt; 95 PetscScalar *hh,*hes,*lhh,shift = pipefgmres->shift; 96 PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ 97 PetscInt loc_it; /* local count of # of dir. in Krylov space */ 98 PetscInt max_k = pipefgmres->max_k; /* max # of directions Krylov space */ 99 PetscInt i,j,k; 100 Mat Amat,Pmat; 101 Vec Q,W; /* Pipelining vectors */ 102 Vec *redux = pipefgmres->redux; /* workspace for single reduction */ 103 104 PetscFunctionBegin; 105 if (itcount) *itcount = 0; 106 107 /* Assign simpler names to these vectors, allocated as pipelining workspace */ 108 Q = VEC_Q; 109 W = VEC_W; 110 111 /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/ 112 /* Note that we add an extra value here to allow for a single reduction */ 113 if (!pipefgmres->orthogwork) { PetscCall(PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork)); 114 } 115 lhh = pipefgmres->orthogwork; 116 117 /* Number of pseudo iterations since last restart is the number 118 of prestart directions */ 119 loc_it = 0; 120 121 /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in 122 KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln. 123 Note that when KSPPIPEFGMRESBuildSoln is called from this function, 124 (loc_it -1) is passed, so the two are equivalent */ 125 pipefgmres->it = (loc_it - 1); 126 127 /* initial residual is in VEC_VV(0) - compute its norm*/ 128 PetscCall(VecNorm(VEC_VV(0),NORM_2,&res_norm)); 129 130 /* first entry in right-hand-side of hessenberg system is just 131 the initial residual norm */ 132 *RS(0) = res_norm; 133 134 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 135 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 136 else ksp->rnorm = 0; 137 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 138 PetscCall(KSPLogResidualHistory(ksp,ksp->rnorm)); 139 PetscCall(KSPMonitor(ksp,ksp->its,ksp->rnorm)); 140 141 /* check for the convergence - maybe the current guess is good enough */ 142 PetscCall((*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP)); 143 if (ksp->reason) { 144 if (itcount) *itcount = 0; 145 PetscFunctionReturn(0); 146 } 147 148 /* scale VEC_VV (the initial residual) */ 149 PetscCall(VecScale(VEC_VV(0),1.0/res_norm)); 150 151 /* Fill the pipeline */ 152 PetscCall(KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it))); 153 PetscCall(PCGetOperators(ksp->pc,&Amat,&Pmat)); 154 PetscCall(KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it))); 155 PetscCall(VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it))); /* Note shift */ 156 157 /* MAIN ITERATION LOOP BEGINNING*/ 158 /* keep iterating until we have converged OR generated the max number 159 of directions OR reached the max number of iterations for the method */ 160 while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { 161 if (loc_it) { 162 PetscCall(KSPLogResidualHistory(ksp,res_norm)); 163 PetscCall(KSPMonitor(ksp,ksp->its,res_norm)); 164 } 165 pipefgmres->it = (loc_it - 1); 166 167 /* see if more space is needed for work vectors */ 168 if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { 169 PetscCall(KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1)); 170 /* (loc_it+1) is passed in as number of the first vector that should 171 be allocated */ 172 } 173 174 /* Note that these inner products are with "Z" now, so 175 in particular, lhh[loc_it] is the 'barred' or 'shifted' value, 176 not the value from the equivalent FGMRES run (even in exact arithmetic) 177 That is, the H we need for the Arnoldi relation is different from the 178 coefficients we use in the orthogonalization process,because of the shift */ 179 180 /* Do some local twiddling to allow for a single reduction */ 181 for (i=0;i<loc_it+1;i++) { 182 redux[i] = VEC_VV(i); 183 } 184 redux[loc_it+1] = ZVEC(loc_it); 185 186 /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */ 187 PetscCall(VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh)); 188 189 /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/ 190 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)))); 191 192 /* The work to be overlapped with the inner products follows. 193 This is application of the preconditioner and the operator 194 to compute intermediate quantites which will be combined (locally) 195 with the results of the inner products. 196 */ 197 PetscCall(KSP_PCApply(ksp,ZVEC(loc_it),Q)); 198 PetscCall(PCGetOperators(ksp->pc,&Amat,&Pmat)); 199 PetscCall(KSP_MatMult(ksp,Amat,Q,W)); 200 201 /* Compute inner products of the new direction with previous directions, 202 and the norm of the to-be-orthogonalized direction "Z". 203 This information is enough to build the required entries 204 of H. The inner product with VEC_VV(it_loc) is 205 *different* than in the standard FGMRES and need to be dealt with specially. 206 That is, for standard FGMRES the orthogonalization coefficients are the same 207 as the coefficients used in the Arnoldi relation to reconstruct, but here this 208 is not true (albeit only for the one entry of H which we "unshift" below. */ 209 210 /* Finish the dot product, retrieving the extra entry */ 211 PetscCall(VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh)); 212 tt = PetscRealPart(lhh[loc_it+1]); 213 214 /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt 215 Note that the Hessenberg entries require a shift, as these are for the 216 relation AU = VH, which is wrt unshifted basis vectors */ 217 hh = HH(0,loc_it); hes=HES(0,loc_it); 218 for (j=0; j<loc_it; j++) { 219 hh[j] = lhh[j]; 220 hes[j] = lhh[j]; 221 } 222 hh[loc_it] = lhh[loc_it] + shift; 223 hes[loc_it] = lhh[loc_it] + shift; 224 225 /* we delay applying the shift here */ 226 for (j=0; j<=loc_it; j++) { 227 lhh[j] = -lhh[j]; /* flip sign */ 228 } 229 230 /* Compute the norm of the un-normalized new direction using the rearranged formula 231 Note that these are shifted ("barred") quantities */ 232 for (k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]))); 233 /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */ 234 if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0 ; 235 if (tt < 0.0) { 236 /* If we detect square root breakdown in the norm, we must restart the algorithm. 237 Here this means we simply break the current loop and reconstruct the solution 238 using the basis we have computed thus far. Note that by breaking immediately, 239 we do not update the iteration count, so computation done in this iteration 240 should be disregarded. 241 */ 242 PetscCall(PetscInfo(ksp,"Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n",ksp->its,(double)tt)); 243 break; 244 } else { 245 tt = PetscSqrtReal(tt); 246 } 247 248 /* new entry in hessenburg is the 2-norm of our new direction */ 249 hh[loc_it+1] = tt; 250 hes[loc_it+1] = tt; 251 252 /* The recurred computation for the new direction 253 The division by tt is delayed to the happy breakdown check later 254 Note placement BEFORE the unshift 255 */ 256 PetscCall(VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1))); 257 PetscCall(VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0))); 258 /* (VEC_VV(loc_it+1) is not normalized yet) */ 259 260 /* The recurred computation for the preconditioned vector (u) */ 261 PetscCall(VecCopy(Q,PREVEC(loc_it+1))); 262 PetscCall(VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0))); 263 PetscCall(VecScale(PREVEC(loc_it+1),1.0/tt)); 264 265 /* Unshift an entry in the GS coefficients ("removing the bar") */ 266 lhh[loc_it] -= shift; 267 268 /* The recurred computation for z (Au) 269 Note placement AFTER the "unshift" */ 270 PetscCall(VecCopy(W,ZVEC(loc_it+1))); 271 PetscCall(VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0))); 272 PetscCall(VecScale(ZVEC(loc_it+1),1.0/tt)); 273 274 /* Happy Breakdown Check */ 275 hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); 276 /* RS(loc_it) contains the res_norm from the last iteration */ 277 hapbnd = PetscMin(pipefgmres->haptol,hapbnd); 278 if (tt > hapbnd) { 279 /* scale new direction by its norm */ 280 PetscCall(VecScale(VEC_VV(loc_it+1),1.0/tt)); 281 } else { 282 /* This happens when the solution is exactly reached. */ 283 /* So there is no new direction... */ 284 PetscCall(VecSet(VEC_TEMP,0.0)); /* set VEC_TEMP to 0 */ 285 hapend = PETSC_TRUE; 286 } 287 /* note that for pipefgmres we could get HES(loc_it+1, loc_it) = 0 and the 288 current solution would not be exact if HES was singular. Note that 289 HH non-singular implies that HES is not singular, and HES is guaranteed 290 to be nonsingular when PREVECS are linearly independent and A is 291 nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity 292 of HES). So we should really add a check to verify that HES is nonsingular.*/ 293 294 /* Note that to be thorough, in debug mode, one could call a LAPACK routine 295 here to check that the hessenberg matrix is indeed non-singular (since 296 FGMRES does not guarantee this) */ 297 298 /* Now apply rotations to new col of hessenberg (and right side of system), 299 calculate new rotation, and get new residual norm at the same time*/ 300 PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm)); 301 if (ksp->reason) break; 302 303 loc_it++; 304 pipefgmres->it = (loc_it-1); /* Add this here in case it has converged */ 305 306 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 307 ksp->its++; 308 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 309 else ksp->rnorm = 0; 310 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 311 312 PetscCall((*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP)); 313 314 /* Catch error in happy breakdown and signal convergence and break from loop */ 315 if (hapend) { 316 if (!ksp->reason) { 317 PetscCheck(!ksp->errorifnotconverged,PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm); 318 ksp->reason = KSP_DIVERGED_BREAKDOWN; 319 break; 320 } 321 } 322 } 323 /* END OF ITERATION LOOP */ 324 PetscCall(KSPLogResidualHistory(ksp,ksp->rnorm)); 325 326 /* 327 Monitor if we know that we will not return for a restart */ 328 if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) { 329 PetscCall(KSPMonitor(ksp,ksp->its,ksp->rnorm)); 330 } 331 332 if (itcount) *itcount = loc_it; 333 334 /* 335 Down here we have to solve for the "best" coefficients of the Krylov 336 columns, add the solution values together, and possibly unwind the 337 preconditioning from the solution 338 */ 339 340 /* Form the solution (or the solution so far) */ 341 /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln 342 properly navigates */ 343 344 PetscCall(KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1)); 345 346 PetscFunctionReturn(0); 347 } 348 349 /* 350 KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method. 351 352 Input Parameter: 353 . ksp - the Krylov space object that was set to use pipefgmres 354 355 Output Parameter: 356 . outits - number of iterations used 357 358 */ 359 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) 360 { 361 PetscInt its,itcount; 362 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 363 PetscBool guess_zero = ksp->guess_zero; 364 365 PetscFunctionBegin; 366 /* We have not checked these routines for use with complex numbers. The inner products 367 are likely not defined correctly for that case */ 368 PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX),PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars"); 369 370 PetscCall(PetscCitationsRegister(citation,&cited)); 371 372 PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd,PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); 373 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 374 ksp->its = 0; 375 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 376 377 itcount = 0; 378 ksp->reason = KSP_CONVERGED_ITERATING; 379 while (!ksp->reason) { 380 PetscCall(KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs)); 381 PetscCall(KSPPIPEFGMRESCycle(&its,ksp)); 382 itcount += its; 383 if (itcount >= ksp->max_it) { 384 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; 385 break; 386 } 387 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ 388 } 389 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ 390 PetscFunctionReturn(0); 391 } 392 393 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) 394 { 395 PetscFunctionBegin; 396 PetscCall(KSPReset_PIPEFGMRES(ksp)); 397 PetscCall(KSPDestroy_GMRES(ksp)); 398 PetscFunctionReturn(0); 399 } 400 401 /* 402 KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the 403 current iterates. 404 405 Input parameters: 406 nrs - work area of size it + 1. 407 vguess - index of initial guess 408 vdest - index of result. Note that vguess may == vdest (replace 409 guess with the solution). 410 it - HH upper triangular part is a block of size (it+1) x (it+1) 411 412 This is an internal routine that knows about the PIPEFGMRES internals. 413 */ 414 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it) 415 { 416 PetscScalar tt; 417 PetscInt k,j; 418 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 419 420 PetscFunctionBegin; 421 /* Solve for solution vector that minimizes the residual */ 422 423 if (it < 0) { /* no pipefgmres steps have been performed */ 424 PetscCall(VecCopy(vguess,vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */ 425 PetscFunctionReturn(0); 426 } 427 428 /* solve the upper triangular system - RS is the right side and HH is 429 the upper triangular matrix - put soln in nrs */ 430 if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it); 431 else nrs[it] = 0.0; 432 433 for (k=it-1; k>=0; k--) { 434 tt = *RS(k); 435 for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j]; 436 nrs[k] = tt / *HH(k,k); 437 } 438 439 /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */ 440 PetscCall(VecZeroEntries(VEC_TEMP)); 441 PetscCall(VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0))); 442 443 /* add solution to previous solution */ 444 if (vdest == vguess) { 445 PetscCall(VecAXPY(vdest,1.0,VEC_TEMP)); 446 } else { 447 PetscCall(VecWAXPY(vdest,1.0,VEC_TEMP,vguess)); 448 } 449 PetscFunctionReturn(0); 450 } 451 452 /* 453 454 KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization. 455 Return new residual. 456 457 input parameters: 458 459 . ksp - Krylov space object 460 . it - plane rotations are applied to the (it+1)th column of the 461 modified hessenberg (i.e. HH(:,it)) 462 . hapend - PETSC_FALSE not happy breakdown ending. 463 464 output parameters: 465 . res - the new residual 466 467 */ 468 /* 469 . it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this 470 */ 471 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res) 472 { 473 PetscScalar *hh,*cc,*ss,*rs; 474 PetscInt j; 475 PetscReal hapbnd; 476 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 477 478 PetscFunctionBegin; 479 hh = HH(0,it); /* pointer to beginning of column to update */ 480 cc = CC(0); /* beginning of cosine rotations */ 481 ss = SS(0); /* beginning of sine rotations */ 482 rs = RS(0); /* right hand side of least squares system */ 483 484 /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ 485 for (j=0; j<=it+1; j++) *HES(j,it) = hh[j]; 486 487 /* check for the happy breakdown */ 488 hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol); 489 if (PetscAbsScalar(hh[it+1]) < hapbnd) { 490 PetscCall(PetscInfo(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)))); 491 *hapend = PETSC_TRUE; 492 } 493 494 /* Apply all the previously computed plane rotations to the new column 495 of the Hessenberg matrix */ 496 /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), 497 and some refs have [c s ; -conj(s) c] (don't be confused!) */ 498 499 for (j=0; j<it; j++) { 500 PetscScalar hhj = hh[j]; 501 hh[j] = PetscConj(cc[j])*hhj + ss[j]*hh[j+1]; 502 hh[j+1] = -ss[j] *hhj + cc[j]*hh[j+1]; 503 } 504 505 /* 506 compute the new plane rotation, and apply it to: 507 1) the right-hand-side of the Hessenberg system (RS) 508 note: it affects RS(it) and RS(it+1) 509 2) the new column of the Hessenberg matrix 510 note: it affects HH(it,it) which is currently pointed to 511 by hh and HH(it+1, it) (*(hh+1)) 512 thus obtaining the updated value of the residual... 513 */ 514 515 /* compute new plane rotation */ 516 517 if (!*hapend) { 518 PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1]))); 519 if (delta == 0.0) { 520 ksp->reason = KSP_DIVERGED_NULL; 521 PetscFunctionReturn(0); 522 } 523 524 cc[it] = hh[it] / delta; /* new cosine value */ 525 ss[it] = hh[it+1] / delta; /* new sine value */ 526 527 hh[it] = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1]; 528 rs[it+1] = -ss[it]*rs[it]; 529 rs[it] = PetscConj(cc[it])*rs[it]; 530 *res = PetscAbsScalar(rs[it+1]); 531 } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply 532 another rotation matrix (so RH doesn't change). The new residual is 533 always the new sine term times the residual from last time (RS(it)), 534 but now the new sine rotation would be zero...so the residual should 535 be zero...so we will multiply "zero" by the last residual. This might 536 not be exactly what we want to do here -could just return "zero". */ 537 538 *res = 0.0; 539 } 540 PetscFunctionReturn(0); 541 } 542 543 /* 544 KSPBuildSolution_PIPEFGMRES 545 546 Input Parameter: 547 . ksp - the Krylov space object 548 . ptr- 549 550 Output Parameter: 551 . result - the solution 552 553 Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle 554 calls directly. 555 556 */ 557 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result) 558 { 559 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 560 561 PetscFunctionBegin; 562 if (!ptr) { 563 if (!pipefgmres->sol_temp) { 564 PetscCall(VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp)); 565 PetscCall(PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp)); 566 } 567 ptr = pipefgmres->sol_temp; 568 } 569 if (!pipefgmres->nrs) { 570 /* allocate the work area */ 571 PetscCall(PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs)); 572 PetscCall(PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar))); 573 } 574 575 PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it)); 576 if (result) *result = ptr; 577 PetscFunctionReturn(0); 578 } 579 580 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp) 581 { 582 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 583 PetscBool flg; 584 PetscScalar shift; 585 586 PetscFunctionBegin; 587 PetscCall(KSPSetFromOptions_GMRES(PetscOptionsObject,ksp)); 588 PetscOptionsHeadBegin(PetscOptionsObject,"KSP pipelined FGMRES Options"); 589 PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg)); 590 if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp,shift)); 591 PetscOptionsHeadEnd(); 592 PetscFunctionReturn(0); 593 } 594 595 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer) 596 { 597 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 598 PetscBool iascii,isstring; 599 600 PetscFunctionBegin; 601 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 602 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring)); 603 604 if (iascii) { 605 PetscCall(PetscViewerASCIIPrintf(viewer," restart=%" PetscInt_FMT "\n",pipefgmres->max_k)); 606 PetscCall(PetscViewerASCIIPrintf(viewer," happy breakdown tolerance %g\n",(double)pipefgmres->haptol)); 607 #if defined(PETSC_USE_COMPLEX) 608 PetscCall(PetscViewerASCIIPrintf(viewer," shift=%g+%gi\n",(double)PetscRealPart(pipefgmres->shift),(double)PetscImaginaryPart(pipefgmres->shift))); 609 #else 610 PetscCall(PetscViewerASCIIPrintf(viewer," shift=%g\n",(double)pipefgmres->shift)); 611 #endif 612 } else if (isstring) { 613 PetscCall(PetscViewerStringSPrintf(viewer,"restart %" PetscInt_FMT,pipefgmres->max_k)); 614 #if defined(PETSC_USE_COMPLEX) 615 PetscCall(PetscViewerStringSPrintf(viewer," shift=%g+%gi\n",(double)PetscRealPart(pipefgmres->shift),(double)PetscImaginaryPart(pipefgmres->shift))); 616 #else 617 PetscCall(PetscViewerStringSPrintf(viewer," shift=%g\n",(double)pipefgmres->shift)); 618 #endif 619 } 620 PetscFunctionReturn(0); 621 } 622 623 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) 624 { 625 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 626 PetscInt i; 627 628 PetscFunctionBegin; 629 PetscCall(PetscFree(pipefgmres->prevecs)); 630 PetscCall(PetscFree(pipefgmres->zvecs)); 631 for (i=0; i<pipefgmres->nwork_alloc; i++) { 632 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i])); 633 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i])); 634 } 635 PetscCall(PetscFree(pipefgmres->prevecs_user_work)); 636 PetscCall(PetscFree(pipefgmres->zvecs_user_work)); 637 PetscCall(PetscFree(pipefgmres->redux)); 638 PetscCall(KSPReset_GMRES(ksp)); 639 PetscFunctionReturn(0); 640 } 641 642 /*MC 643 KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method. 644 645 A flexible, 1-stage pipelined variant of GMRES. 646 647 Options Database Keys: 648 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against 649 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence) 650 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of 651 . -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift() 652 vectors are allocated as needed) 653 - -ksp_gmres_krylov_monitor - plot the Krylov space generated 654 655 Level: intermediate 656 657 Notes: 658 659 This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter. 660 661 A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator. 662 663 Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES). 664 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods. 665 See the FAQ on the PETSc website for details. 666 667 Developer Notes: 668 This class is subclassed off of KSPGMRES. 669 670 Reference: 671 P. Sanan, S.M. Schnepp, and D.A. May, 672 "Pipelined, Flexible Krylov Subspace Methods," 673 SIAM Journal on Scientific Computing 2016 38:5, C441-C470, 674 DOI: 10.1137/15M1049130 675 676 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES` 677 `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()` 678 M*/ 679 680 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) 681 { 682 KSP_PIPEFGMRES *pipefgmres; 683 684 PetscFunctionBegin; 685 PetscCall(PetscNewLog(ksp,&pipefgmres)); 686 687 ksp->data = (void*)pipefgmres; 688 ksp->ops->buildsolution = KSPBuildSolution_PIPEFGMRES; 689 ksp->ops->setup = KSPSetUp_PIPEFGMRES; 690 ksp->ops->solve = KSPSolve_PIPEFGMRES; 691 ksp->ops->reset = KSPReset_PIPEFGMRES; 692 ksp->ops->destroy = KSPDestroy_PIPEFGMRES; 693 ksp->ops->view = KSPView_PIPEFGMRES; 694 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFGMRES; 695 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES; 696 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES; 697 698 PetscCall(KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3)); 699 PetscCall(KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1)); 700 701 PetscCall(PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES)); 702 PetscCall(PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES)); 703 PetscCall(PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES)); 704 705 pipefgmres->nextra_vecs = 1; 706 pipefgmres->haptol = 1.0e-30; 707 pipefgmres->q_preallocate = 0; 708 pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS; 709 pipefgmres->orthog = NULL; 710 pipefgmres->nrs = NULL; 711 pipefgmres->sol_temp = NULL; 712 pipefgmres->max_k = PIPEFGMRES_DEFAULT_MAXK; 713 pipefgmres->Rsvd = NULL; 714 pipefgmres->orthogwork = NULL; 715 pipefgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER; 716 pipefgmres->shift = 1.0; 717 PetscFunctionReturn(0); 718 } 719 720 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it) 721 { 722 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 723 PetscInt nwork = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */ 724 PetscInt nalloc; /* number to allocate */ 725 PetscInt k; 726 727 PetscFunctionBegin; 728 nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate 729 in a single chunk */ 730 731 /* Adjust the number to allocate to make sure that we don't exceed the 732 number of available slots (pipefgmres->vecs_allocated)*/ 733 if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) { 734 nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET; 735 } 736 if (!nalloc) PetscFunctionReturn(0); 737 738 pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */ 739 740 /* work vectors */ 741 PetscCall(KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL)); 742 PetscCall(PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork])); 743 for (k=0; k < nalloc; k++) { 744 pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k]; 745 } 746 /* specify size of chunk allocated */ 747 pipefgmres->mwork_alloc[nwork] = nalloc; 748 749 /* preconditioned vectors (note we don't use VEC_OFFSET) */ 750 PetscCall(KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL)); 751 PetscCall(PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork])); 752 for (k=0; k < nalloc; k++) { 753 pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k]; 754 } 755 756 PetscCall(KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL)); 757 PetscCall(PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork])); 758 for (k=0; k < nalloc; k++) { 759 pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k]; 760 } 761 762 /* increment the number of work vector chunks */ 763 pipefgmres->nwork_alloc++; 764 PetscFunctionReturn(0); 765 } 766 767 /*@ 768 KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver. 769 770 A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there). 771 772 Logically Collective on ksp 773 774 Input Parameters: 775 + ksp - the Krylov space context 776 - shift - the shift 777 778 Level: intermediate 779 780 Options Database: 781 . -ksp_pipefgmres_shift <shift> - set the shift parameter 782 783 .seealso: `KSPComputeEigenvalues()` 784 @*/ 785 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift) 786 { 787 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 788 789 PetscFunctionBegin; 790 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1); 791 PetscValidLogicalCollectiveScalar(ksp,shift,2); 792 pipefgmres->shift = shift; 793 PetscFunctionReturn(0); 794 } 795