1 /* 2 Contributed by Patrick Sanan and Sascha M. Schnepp 3 */ 4 5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I "petscksp.h" I*/ 6 7 static PetscBool cited = PETSC_FALSE; 8 static const char citation[] = "@article{SSM2016,\n" 9 " author = {P. Sanan and S.M. Schnepp and D.A. May},\n" 10 " title = {Pipelined, Flexible Krylov Subspace Methods},\n" 11 " journal = {SIAM Journal on Scientific Computing},\n" 12 " volume = {38},\n" 13 " number = {5},\n" 14 " pages = {C441-C470},\n" 15 " year = {2016},\n" 16 " doi = {10.1137/15M1049130},\n" 17 " URL = {http://dx.doi.org/10.1137/15M1049130},\n" 18 " eprint = {http://dx.doi.org/10.1137/15M1049130}\n" 19 "}\n"; 20 21 #define PIPEFGMRES_DELTA_DIRECTIONS 10 22 #define PIPEFGMRES_DEFAULT_MAXK 30 23 24 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP, PetscInt); 25 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *); 26 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt); 27 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP); 28 29 /* 30 31 KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres. 32 33 This is called once, usually automatically by KSPSolve() or KSPSetUp(), 34 but can be called directly by KSPSetUp(). 35 36 */ 37 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) { 38 PetscInt k; 39 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 40 const PetscInt max_k = pipefgmres->max_k; 41 42 PetscFunctionBegin; 43 PetscCall(KSPSetUp_GMRES(ksp)); 44 45 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs)); 46 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs_user_work)); 47 48 PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->prevecs_user_work[0], 0, NULL)); 49 for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k]; 50 51 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs)); 52 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs_user_work)); 53 54 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->redux)); 55 56 PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->zvecs_user_work[0], 0, NULL)); 57 for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k]; 58 59 PetscFunctionReturn(0); 60 } 61 62 /* 63 64 KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart. Return residual 65 history if requested. 66 67 input parameters: 68 . pipefgmres - structure containing parameters and work areas 69 70 output parameters: 71 . itcount - number of iterations used. If null, ignored. 72 . converged - 0 if not converged 73 74 Notes: 75 On entry, the value in vector VEC_VV(0) should be 76 the initial residual. 77 78 */ 79 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount, KSP ksp) { 80 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data); 81 PetscReal res_norm; 82 PetscReal hapbnd, tt; 83 PetscScalar *hh, *hes, *lhh, shift = pipefgmres->shift; 84 PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ 85 PetscInt loc_it; /* local count of # of dir. in Krylov space */ 86 PetscInt max_k = pipefgmres->max_k; /* max # of directions Krylov space */ 87 PetscInt i, j, k; 88 Mat Amat, Pmat; 89 Vec Q, W; /* Pipelining vectors */ 90 Vec *redux = pipefgmres->redux; /* workspace for single reduction */ 91 92 PetscFunctionBegin; 93 if (itcount) *itcount = 0; 94 95 /* Assign simpler names to these vectors, allocated as pipelining workspace */ 96 Q = VEC_Q; 97 W = VEC_W; 98 99 /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/ 100 /* Note that we add an extra value here to allow for a single reduction */ 101 if (!pipefgmres->orthogwork) PetscCall(PetscMalloc1(pipefgmres->max_k + 2, &pipefgmres->orthogwork)); 102 lhh = pipefgmres->orthogwork; 103 104 /* Number of pseudo iterations since last restart is the number 105 of prestart directions */ 106 loc_it = 0; 107 108 /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in 109 KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln. 110 Note that when KSPPIPEFGMRESBuildSoln is called from this function, 111 (loc_it -1) is passed, so the two are equivalent */ 112 pipefgmres->it = (loc_it - 1); 113 114 /* initial residual is in VEC_VV(0) - compute its norm*/ 115 PetscCall(VecNorm(VEC_VV(0), NORM_2, &res_norm)); 116 117 /* first entry in right-hand-side of hessenberg system is just 118 the initial residual norm */ 119 *RS(0) = res_norm; 120 121 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 122 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 123 else ksp->rnorm = 0; 124 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 125 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 126 PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm)); 127 128 /* check for the convergence - maybe the current guess is good enough */ 129 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP)); 130 if (ksp->reason) { 131 if (itcount) *itcount = 0; 132 PetscFunctionReturn(0); 133 } 134 135 /* scale VEC_VV (the initial residual) */ 136 PetscCall(VecScale(VEC_VV(0), 1.0 / res_norm)); 137 138 /* Fill the pipeline */ 139 PetscCall(KSP_PCApply(ksp, VEC_VV(loc_it), PREVEC(loc_it))); 140 PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat)); 141 PetscCall(KSP_MatMult(ksp, Amat, PREVEC(loc_it), ZVEC(loc_it))); 142 PetscCall(VecAXPY(ZVEC(loc_it), -shift, VEC_VV(loc_it))); /* Note shift */ 143 144 /* MAIN ITERATION LOOP BEGINNING*/ 145 /* keep iterating until we have converged OR generated the max number 146 of directions OR reached the max number of iterations for the method */ 147 while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { 148 if (loc_it) { 149 PetscCall(KSPLogResidualHistory(ksp, res_norm)); 150 PetscCall(KSPMonitor(ksp, ksp->its, res_norm)); 151 } 152 pipefgmres->it = (loc_it - 1); 153 154 /* see if more space is needed for work vectors */ 155 if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { 156 PetscCall(KSPPIPEFGMRESGetNewVectors(ksp, loc_it + 1)); 157 /* (loc_it+1) is passed in as number of the first vector that should 158 be allocated */ 159 } 160 161 /* Note that these inner products are with "Z" now, so 162 in particular, lhh[loc_it] is the 'barred' or 'shifted' value, 163 not the value from the equivalent FGMRES run (even in exact arithmetic) 164 That is, the H we need for the Arnoldi relation is different from the 165 coefficients we use in the orthogonalization process,because of the shift */ 166 167 /* Do some local twiddling to allow for a single reduction */ 168 for (i = 0; i < loc_it + 1; i++) redux[i] = VEC_VV(i); 169 redux[loc_it + 1] = ZVEC(loc_it); 170 171 /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */ 172 PetscCall(VecMDotBegin(ZVEC(loc_it), loc_it + 2, redux, lhh)); 173 174 /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/ 175 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)))); 176 177 /* The work to be overlapped with the inner products follows. 178 This is application of the preconditioner and the operator 179 to compute intermediate quantites which will be combined (locally) 180 with the results of the inner products. 181 */ 182 PetscCall(KSP_PCApply(ksp, ZVEC(loc_it), Q)); 183 PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat)); 184 PetscCall(KSP_MatMult(ksp, Amat, Q, W)); 185 186 /* Compute inner products of the new direction with previous directions, 187 and the norm of the to-be-orthogonalized direction "Z". 188 This information is enough to build the required entries 189 of H. The inner product with VEC_VV(it_loc) is 190 *different* than in the standard FGMRES and need to be dealt with specially. 191 That is, for standard FGMRES the orthogonalization coefficients are the same 192 as the coefficients used in the Arnoldi relation to reconstruct, but here this 193 is not true (albeit only for the one entry of H which we "unshift" below. */ 194 195 /* Finish the dot product, retrieving the extra entry */ 196 PetscCall(VecMDotEnd(ZVEC(loc_it), loc_it + 2, redux, lhh)); 197 tt = PetscRealPart(lhh[loc_it + 1]); 198 199 /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt 200 Note that the Hessenberg entries require a shift, as these are for the 201 relation AU = VH, which is wrt unshifted basis vectors */ 202 hh = HH(0, loc_it); 203 hes = HES(0, loc_it); 204 for (j = 0; j < loc_it; j++) { 205 hh[j] = lhh[j]; 206 hes[j] = lhh[j]; 207 } 208 hh[loc_it] = lhh[loc_it] + shift; 209 hes[loc_it] = lhh[loc_it] + shift; 210 211 /* we delay applying the shift here */ 212 for (j = 0; j <= loc_it; j++) { lhh[j] = -lhh[j]; /* flip sign */ } 213 214 /* Compute the norm of the un-normalized new direction using the rearranged formula 215 Note that these are shifted ("barred") quantities */ 216 for (k = 0; k <= loc_it; k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]))); 217 /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */ 218 if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0; 219 if (tt < 0.0) { 220 /* If we detect square root breakdown in the norm, we must restart the algorithm. 221 Here this means we simply break the current loop and reconstruct the solution 222 using the basis we have computed thus far. Note that by breaking immediately, 223 we do not update the iteration count, so computation done in this iteration 224 should be disregarded. 225 */ 226 PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n", ksp->its, (double)tt)); 227 break; 228 } else { 229 tt = PetscSqrtReal(tt); 230 } 231 232 /* new entry in hessenburg is the 2-norm of our new direction */ 233 hh[loc_it + 1] = tt; 234 hes[loc_it + 1] = tt; 235 236 /* The recurred computation for the new direction 237 The division by tt is delayed to the happy breakdown check later 238 Note placement BEFORE the unshift 239 */ 240 PetscCall(VecCopy(ZVEC(loc_it), VEC_VV(loc_it + 1))); 241 PetscCall(VecMAXPY(VEC_VV(loc_it + 1), loc_it + 1, lhh, &VEC_VV(0))); 242 /* (VEC_VV(loc_it+1) is not normalized yet) */ 243 244 /* The recurred computation for the preconditioned vector (u) */ 245 PetscCall(VecCopy(Q, PREVEC(loc_it + 1))); 246 PetscCall(VecMAXPY(PREVEC(loc_it + 1), loc_it + 1, lhh, &PREVEC(0))); 247 PetscCall(VecScale(PREVEC(loc_it + 1), 1.0 / tt)); 248 249 /* Unshift an entry in the GS coefficients ("removing the bar") */ 250 lhh[loc_it] -= shift; 251 252 /* The recurred computation for z (Au) 253 Note placement AFTER the "unshift" */ 254 PetscCall(VecCopy(W, ZVEC(loc_it + 1))); 255 PetscCall(VecMAXPY(ZVEC(loc_it + 1), loc_it + 1, lhh, &ZVEC(0))); 256 PetscCall(VecScale(ZVEC(loc_it + 1), 1.0 / tt)); 257 258 /* Happy Breakdown Check */ 259 hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); 260 /* RS(loc_it) contains the res_norm from the last iteration */ 261 hapbnd = PetscMin(pipefgmres->haptol, hapbnd); 262 if (tt > hapbnd) { 263 /* scale new direction by its norm */ 264 PetscCall(VecScale(VEC_VV(loc_it + 1), 1.0 / tt)); 265 } else { 266 /* This happens when the solution is exactly reached. */ 267 /* So there is no new direction... */ 268 PetscCall(VecSet(VEC_TEMP, 0.0)); /* set VEC_TEMP to 0 */ 269 hapend = PETSC_TRUE; 270 } 271 /* note that for pipefgmres we could get HES(loc_it+1, loc_it) = 0 and the 272 current solution would not be exact if HES was singular. Note that 273 HH non-singular implies that HES is not singular, and HES is guaranteed 274 to be nonsingular when PREVECS are linearly independent and A is 275 nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity 276 of HES). So we should really add a check to verify that HES is nonsingular.*/ 277 278 /* Note that to be thorough, in debug mode, one could call a LAPACK routine 279 here to check that the hessenberg matrix is indeed non-singular (since 280 FGMRES does not guarantee this) */ 281 282 /* Now apply rotations to new col of hessenberg (and right side of system), 283 calculate new rotation, and get new residual norm at the same time*/ 284 PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp, loc_it, &hapend, &res_norm)); 285 if (ksp->reason) break; 286 287 loc_it++; 288 pipefgmres->it = (loc_it - 1); /* Add this here in case it has converged */ 289 290 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 291 ksp->its++; 292 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 293 else ksp->rnorm = 0; 294 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 295 296 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP)); 297 298 /* Catch error in happy breakdown and signal convergence and break from loop */ 299 if (hapend) { 300 if (!ksp->reason) { 301 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "You reached the happy break down, but convergence was not indicated. Residual norm = %g", (double)res_norm); 302 ksp->reason = KSP_DIVERGED_BREAKDOWN; 303 break; 304 } 305 } 306 } 307 /* END OF ITERATION LOOP */ 308 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 309 310 /* 311 Monitor if we know that we will not return for a restart */ 312 if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm)); 313 314 if (itcount) *itcount = loc_it; 315 316 /* 317 Down here we have to solve for the "best" coefficients of the Krylov 318 columns, add the solution values together, and possibly unwind the 319 preconditioning from the solution 320 */ 321 322 /* Form the solution (or the solution so far) */ 323 /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln 324 properly navigates */ 325 326 PetscCall(KSPPIPEFGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, loc_it - 1)); 327 328 PetscFunctionReturn(0); 329 } 330 331 /* 332 KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method. 333 334 Input Parameter: 335 . ksp - the Krylov space object that was set to use pipefgmres 336 337 Output Parameter: 338 . outits - number of iterations used 339 340 */ 341 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) { 342 PetscInt its, itcount; 343 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 344 PetscBool guess_zero = ksp->guess_zero; 345 346 PetscFunctionBegin; 347 /* We have not checked these routines for use with complex numbers. The inner products 348 are likely not defined correctly for that case */ 349 PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars"); 350 351 PetscCall(PetscCitationsRegister(citation, &cited)); 352 353 PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); 354 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 355 ksp->its = 0; 356 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 357 358 itcount = 0; 359 ksp->reason = KSP_CONVERGED_ITERATING; 360 while (!ksp->reason) { 361 PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs)); 362 PetscCall(KSPPIPEFGMRESCycle(&its, ksp)); 363 itcount += its; 364 if (itcount >= ksp->max_it) { 365 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; 366 break; 367 } 368 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ 369 } 370 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ 371 PetscFunctionReturn(0); 372 } 373 374 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) { 375 PetscFunctionBegin; 376 PetscCall(KSPReset_PIPEFGMRES(ksp)); 377 PetscCall(KSPDestroy_GMRES(ksp)); 378 PetscFunctionReturn(0); 379 } 380 381 /* 382 KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the 383 current iterates. 384 385 Input parameters: 386 nrs - work area of size it + 1. 387 vguess - index of initial guess 388 vdest - index of result. Note that vguess may == vdest (replace 389 guess with the solution). 390 it - HH upper triangular part is a block of size (it+1) x (it+1) 391 392 This is an internal routine that knows about the PIPEFGMRES internals. 393 */ 394 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it) { 395 PetscScalar tt; 396 PetscInt k, j; 397 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data); 398 399 PetscFunctionBegin; 400 /* Solve for solution vector that minimizes the residual */ 401 402 if (it < 0) { /* no pipefgmres steps have been performed */ 403 PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */ 404 PetscFunctionReturn(0); 405 } 406 407 /* solve the upper triangular system - RS is the right side and HH is 408 the upper triangular matrix - put soln in nrs */ 409 if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it); 410 else nrs[it] = 0.0; 411 412 for (k = it - 1; k >= 0; k--) { 413 tt = *RS(k); 414 for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j]; 415 nrs[k] = tt / *HH(k, k); 416 } 417 418 /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */ 419 PetscCall(VecZeroEntries(VEC_TEMP)); 420 PetscCall(VecMAXPY(VEC_TEMP, it + 1, nrs, &PREVEC(0))); 421 422 /* add solution to previous solution */ 423 if (vdest == vguess) { 424 PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP)); 425 } else { 426 PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess)); 427 } 428 PetscFunctionReturn(0); 429 } 430 431 /* 432 433 KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization. 434 Return new residual. 435 436 input parameters: 437 438 . ksp - Krylov space object 439 . it - plane rotations are applied to the (it+1)th column of the 440 modified hessenberg (i.e. HH(:,it)) 441 . hapend - PETSC_FALSE not happy breakdown ending. 442 443 output parameters: 444 . res - the new residual 445 446 */ 447 /* 448 . it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this 449 */ 450 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res) { 451 PetscScalar *hh, *cc, *ss, *rs; 452 PetscInt j; 453 PetscReal hapbnd; 454 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data); 455 456 PetscFunctionBegin; 457 hh = HH(0, it); /* pointer to beginning of column to update */ 458 cc = CC(0); /* beginning of cosine rotations */ 459 ss = SS(0); /* beginning of sine rotations */ 460 rs = RS(0); /* right hand side of least squares system */ 461 462 /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ 463 for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j]; 464 465 /* check for the happy breakdown */ 466 hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pipefgmres->haptol); 467 if (PetscAbsScalar(hh[it + 1]) < hapbnd) { 468 PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it)))); 469 *hapend = PETSC_TRUE; 470 } 471 472 /* Apply all the previously computed plane rotations to the new column 473 of the Hessenberg matrix */ 474 /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), 475 and some refs have [c s ; -conj(s) c] (don't be confused!) */ 476 477 for (j = 0; j < it; j++) { 478 PetscScalar hhj = hh[j]; 479 hh[j] = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1]; 480 hh[j + 1] = -ss[j] * hhj + cc[j] * hh[j + 1]; 481 } 482 483 /* 484 compute the new plane rotation, and apply it to: 485 1) the right-hand-side of the Hessenberg system (RS) 486 note: it affects RS(it) and RS(it+1) 487 2) the new column of the Hessenberg matrix 488 note: it affects HH(it,it) which is currently pointed to 489 by hh and HH(it+1, it) (*(hh+1)) 490 thus obtaining the updated value of the residual... 491 */ 492 493 /* compute new plane rotation */ 494 495 if (!*hapend) { 496 PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1]))); 497 if (delta == 0.0) { 498 ksp->reason = KSP_DIVERGED_NULL; 499 PetscFunctionReturn(0); 500 } 501 502 cc[it] = hh[it] / delta; /* new cosine value */ 503 ss[it] = hh[it + 1] / delta; /* new sine value */ 504 505 hh[it] = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1]; 506 rs[it + 1] = -ss[it] * rs[it]; 507 rs[it] = PetscConj(cc[it]) * rs[it]; 508 *res = PetscAbsScalar(rs[it + 1]); 509 } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply 510 another rotation matrix (so RH doesn't change). The new residual is 511 always the new sine term times the residual from last time (RS(it)), 512 but now the new sine rotation would be zero...so the residual should 513 be zero...so we will multiply "zero" by the last residual. This might 514 not be exactly what we want to do here -could just return "zero". */ 515 516 *res = 0.0; 517 } 518 PetscFunctionReturn(0); 519 } 520 521 /* 522 KSPBuildSolution_PIPEFGMRES 523 524 Input Parameter: 525 . ksp - the Krylov space object 526 . ptr- 527 528 Output Parameter: 529 . result - the solution 530 531 Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle 532 calls directly. 533 534 */ 535 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp, Vec ptr, Vec *result) { 536 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 537 538 PetscFunctionBegin; 539 if (!ptr) { 540 if (!pipefgmres->sol_temp) { PetscCall(VecDuplicate(ksp->vec_sol, &pipefgmres->sol_temp)); } 541 ptr = pipefgmres->sol_temp; 542 } 543 if (!pipefgmres->nrs) { 544 /* allocate the work area */ 545 PetscCall(PetscMalloc1(pipefgmres->max_k, &pipefgmres->nrs)); 546 } 547 548 PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs, ksp->vec_sol, ptr, ksp, pipefgmres->it)); 549 if (result) *result = ptr; 550 PetscFunctionReturn(0); 551 } 552 553 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject) { 554 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 555 PetscBool flg; 556 PetscScalar shift; 557 558 PetscFunctionBegin; 559 PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject)); 560 PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined FGMRES Options"); 561 PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift", "shift parameter", "KSPPIPEFGMRESSetShift", pipefgmres->shift, &shift, &flg)); 562 if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp, shift)); 563 PetscOptionsHeadEnd(); 564 PetscFunctionReturn(0); 565 } 566 567 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp, PetscViewer viewer) { 568 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 569 PetscBool iascii, isstring; 570 571 PetscFunctionBegin; 572 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 573 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring)); 574 575 if (iascii) { 576 PetscCall(PetscViewerASCIIPrintf(viewer, " restart=%" PetscInt_FMT "\n", pipefgmres->max_k)); 577 PetscCall(PetscViewerASCIIPrintf(viewer, " happy breakdown tolerance %g\n", (double)pipefgmres->haptol)); 578 #if defined(PETSC_USE_COMPLEX) 579 PetscCall(PetscViewerASCIIPrintf(viewer, " shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift))); 580 #else 581 PetscCall(PetscViewerASCIIPrintf(viewer, " shift=%g\n", (double)pipefgmres->shift)); 582 #endif 583 } else if (isstring) { 584 PetscCall(PetscViewerStringSPrintf(viewer, "restart %" PetscInt_FMT, pipefgmres->max_k)); 585 #if defined(PETSC_USE_COMPLEX) 586 PetscCall(PetscViewerStringSPrintf(viewer, " shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift))); 587 #else 588 PetscCall(PetscViewerStringSPrintf(viewer, " shift=%g\n", (double)pipefgmres->shift)); 589 #endif 590 } 591 PetscFunctionReturn(0); 592 } 593 594 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) { 595 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 596 PetscInt i; 597 598 PetscFunctionBegin; 599 PetscCall(PetscFree(pipefgmres->prevecs)); 600 PetscCall(PetscFree(pipefgmres->zvecs)); 601 for (i = 0; i < pipefgmres->nwork_alloc; i++) { 602 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->prevecs_user_work[i])); 603 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->zvecs_user_work[i])); 604 } 605 PetscCall(PetscFree(pipefgmres->prevecs_user_work)); 606 PetscCall(PetscFree(pipefgmres->zvecs_user_work)); 607 PetscCall(PetscFree(pipefgmres->redux)); 608 PetscCall(KSPReset_GMRES(ksp)); 609 PetscFunctionReturn(0); 610 } 611 612 /*MC 613 KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method. 614 615 A flexible, 1-stage pipelined variant of GMRES. 616 617 Options Database Keys: 618 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against 619 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence) 620 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of 621 . -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift() 622 vectors are allocated as needed) 623 - -ksp_gmres_krylov_monitor - plot the Krylov space generated 624 625 Level: intermediate 626 627 Notes: 628 629 This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter. 630 631 A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator. 632 633 Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES). 634 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods. 635 See the FAQ on the PETSc website for details. 636 637 Developer Notes: 638 This class is subclassed off of KSPGMRES. 639 640 Reference: 641 P. Sanan, S.M. Schnepp, and D.A. May, 642 "Pipelined, Flexible Krylov Subspace Methods," 643 SIAM Journal on Scientific Computing 2016 38:5, C441-C470, 644 DOI: 10.1137/15M1049130 645 646 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES` 647 `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()` 648 M*/ 649 650 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) { 651 KSP_PIPEFGMRES *pipefgmres; 652 653 PetscFunctionBegin; 654 PetscCall(PetscNew(&pipefgmres)); 655 656 ksp->data = (void *)pipefgmres; 657 ksp->ops->buildsolution = KSPBuildSolution_PIPEFGMRES; 658 ksp->ops->setup = KSPSetUp_PIPEFGMRES; 659 ksp->ops->solve = KSPSolve_PIPEFGMRES; 660 ksp->ops->reset = KSPReset_PIPEFGMRES; 661 ksp->ops->destroy = KSPDestroy_PIPEFGMRES; 662 ksp->ops->view = KSPView_PIPEFGMRES; 663 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFGMRES; 664 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES; 665 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES; 666 667 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 3)); 668 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1)); 669 670 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES)); 671 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES)); 672 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES)); 673 674 pipefgmres->nextra_vecs = 1; 675 pipefgmres->haptol = 1.0e-30; 676 pipefgmres->q_preallocate = 0; 677 pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS; 678 pipefgmres->orthog = NULL; 679 pipefgmres->nrs = NULL; 680 pipefgmres->sol_temp = NULL; 681 pipefgmres->max_k = PIPEFGMRES_DEFAULT_MAXK; 682 pipefgmres->Rsvd = NULL; 683 pipefgmres->orthogwork = NULL; 684 pipefgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER; 685 pipefgmres->shift = 1.0; 686 PetscFunctionReturn(0); 687 } 688 689 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp, PetscInt it) { 690 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 691 PetscInt nwork = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */ 692 PetscInt nalloc; /* number to allocate */ 693 PetscInt k; 694 695 PetscFunctionBegin; 696 nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate 697 in a single chunk */ 698 699 /* Adjust the number to allocate to make sure that we don't exceed the 700 number of available slots (pipefgmres->vecs_allocated)*/ 701 if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET; 702 if (!nalloc) PetscFunctionReturn(0); 703 704 pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */ 705 706 /* work vectors */ 707 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->user_work[nwork], 0, NULL)); 708 for (k = 0; k < nalloc; k++) pipefgmres->vecs[it + VEC_OFFSET + k] = pipefgmres->user_work[nwork][k]; 709 /* specify size of chunk allocated */ 710 pipefgmres->mwork_alloc[nwork] = nalloc; 711 712 /* preconditioned vectors (note we don't use VEC_OFFSET) */ 713 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->prevecs_user_work[nwork], 0, NULL)); 714 for (k = 0; k < nalloc; k++) pipefgmres->prevecs[it + k] = pipefgmres->prevecs_user_work[nwork][k]; 715 716 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->zvecs_user_work[nwork], 0, NULL)); 717 for (k = 0; k < nalloc; k++) pipefgmres->zvecs[it + k] = pipefgmres->zvecs_user_work[nwork][k]; 718 719 /* increment the number of work vector chunks */ 720 pipefgmres->nwork_alloc++; 721 PetscFunctionReturn(0); 722 } 723 724 /*@ 725 KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver. 726 727 A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there). 728 729 Logically Collective on ksp 730 731 Input Parameters: 732 + ksp - the Krylov space context 733 - shift - the shift 734 735 Level: intermediate 736 737 Options Database: 738 . -ksp_pipefgmres_shift <shift> - set the shift parameter 739 740 .seealso: `KSPComputeEigenvalues()` 741 @*/ 742 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp, PetscScalar shift) { 743 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 744 745 PetscFunctionBegin; 746 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1); 747 PetscValidLogicalCollectiveScalar(ksp, shift, 2); 748 pipefgmres->shift = shift; 749 PetscFunctionReturn(0); 750 } 751