1 2 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I "petscksp.h" I*/ 3 4 static PetscBool cited = PETSC_FALSE; 5 static const char citation[] = "@article{SSM2016,\n" 6 " author = {P. Sanan and S.M. Schnepp and D.A. May},\n" 7 " title = {Pipelined, Flexible Krylov Subspace Methods},\n" 8 " journal = {SIAM Journal on Scientific Computing},\n" 9 " volume = {38},\n" 10 " number = {5},\n" 11 " pages = {C441-C470},\n" 12 " year = {2016},\n" 13 " doi = {10.1137/15M1049130},\n" 14 " URL = {http://dx.doi.org/10.1137/15M1049130},\n" 15 " eprint = {http://dx.doi.org/10.1137/15M1049130}\n" 16 "}\n"; 17 18 #define PIPEFGMRES_DELTA_DIRECTIONS 10 19 #define PIPEFGMRES_DEFAULT_MAXK 30 20 21 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP, PetscInt); 22 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *); 23 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt); 24 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP); 25 26 /* 27 28 KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres. 29 30 This is called once, usually automatically by KSPSolve() or KSPSetUp(), 31 but can be called directly by KSPSetUp(). 32 33 */ 34 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) 35 { 36 PetscInt k; 37 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 38 const PetscInt max_k = pipefgmres->max_k; 39 40 PetscFunctionBegin; 41 PetscCall(KSPSetUp_GMRES(ksp)); 42 43 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs)); 44 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs_user_work)); 45 46 PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->prevecs_user_work[0], 0, NULL)); 47 for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k]; 48 49 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs)); 50 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs_user_work)); 51 52 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->redux)); 53 54 PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->zvecs_user_work[0], 0, NULL)); 55 for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k]; 56 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /* 61 62 KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart. Return residual 63 history if requested. 64 65 input parameters: 66 . pipefgmres - structure containing parameters and work areas 67 68 output parameters: 69 . itcount - number of iterations used. If null, ignored. 70 . converged - 0 if not converged 71 72 Notes: 73 On entry, the value in vector VEC_VV(0) should be 74 the initial residual. 75 76 */ 77 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount, KSP ksp) 78 { 79 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data); 80 PetscReal res_norm; 81 PetscReal hapbnd, tt; 82 PetscScalar *hh, *hes, *lhh, shift = pipefgmres->shift; 83 PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ 84 PetscInt loc_it; /* local count of # of dir. in Krylov space */ 85 PetscInt max_k = pipefgmres->max_k; /* max # of directions Krylov space */ 86 PetscInt i, j, k; 87 Mat Amat, Pmat; 88 Vec Q, W; /* Pipelining vectors */ 89 Vec *redux = pipefgmres->redux; /* workspace for single reduction */ 90 91 PetscFunctionBegin; 92 if (itcount) *itcount = 0; 93 94 /* Assign simpler names to these vectors, allocated as pipelining workspace */ 95 Q = VEC_Q; 96 W = VEC_W; 97 98 /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/ 99 /* Note that we add an extra value here to allow for a single reduction */ 100 if (!pipefgmres->orthogwork) PetscCall(PetscMalloc1(pipefgmres->max_k + 2, &pipefgmres->orthogwork)); 101 lhh = pipefgmres->orthogwork; 102 103 /* Number of pseudo iterations since last restart is the number 104 of prestart directions */ 105 loc_it = 0; 106 107 /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in 108 KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln. 109 Note that when KSPPIPEFGMRESBuildSoln is called from this function, 110 (loc_it -1) is passed, so the two are equivalent */ 111 pipefgmres->it = (loc_it - 1); 112 113 /* initial residual is in VEC_VV(0) - compute its norm*/ 114 PetscCall(VecNorm(VEC_VV(0), NORM_2, &res_norm)); 115 116 /* first entry in right-hand-side of hessenberg system is just 117 the initial residual norm */ 118 *RS(0) = res_norm; 119 120 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 121 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 122 else ksp->rnorm = 0; 123 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 124 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 125 PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm)); 126 127 /* check for the convergence - maybe the current guess is good enough */ 128 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP)); 129 if (ksp->reason) { 130 if (itcount) *itcount = 0; 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /* scale VEC_VV (the initial residual) */ 135 PetscCall(VecScale(VEC_VV(0), 1.0 / res_norm)); 136 137 /* Fill the pipeline */ 138 PetscCall(KSP_PCApply(ksp, VEC_VV(loc_it), PREVEC(loc_it))); 139 PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat)); 140 PetscCall(KSP_MatMult(ksp, Amat, PREVEC(loc_it), ZVEC(loc_it))); 141 PetscCall(VecAXPY(ZVEC(loc_it), -shift, VEC_VV(loc_it))); /* Note shift */ 142 143 /* MAIN ITERATION LOOP BEGINNING*/ 144 /* keep iterating until we have converged OR generated the max number 145 of directions OR reached the max number of iterations for the method */ 146 while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { 147 if (loc_it) { 148 PetscCall(KSPLogResidualHistory(ksp, res_norm)); 149 PetscCall(KSPMonitor(ksp, ksp->its, res_norm)); 150 } 151 pipefgmres->it = (loc_it - 1); 152 153 /* see if more space is needed for work vectors */ 154 if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { 155 PetscCall(KSPPIPEFGMRESGetNewVectors(ksp, loc_it + 1)); 156 /* (loc_it+1) is passed in as number of the first vector that should 157 be allocated */ 158 } 159 160 /* Note that these inner products are with "Z" now, so 161 in particular, lhh[loc_it] is the 'barred' or 'shifted' value, 162 not the value from the equivalent FGMRES run (even in exact arithmetic) 163 That is, the H we need for the Arnoldi relation is different from the 164 coefficients we use in the orthogonalization process,because of the shift */ 165 166 /* Do some local twiddling to allow for a single reduction */ 167 for (i = 0; i < loc_it + 1; i++) redux[i] = VEC_VV(i); 168 redux[loc_it + 1] = ZVEC(loc_it); 169 170 /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */ 171 PetscCall(VecMDotBegin(ZVEC(loc_it), loc_it + 2, redux, lhh)); 172 173 /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/ 174 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)))); 175 176 /* The work to be overlapped with the inner products follows. 177 This is application of the preconditioner and the operator 178 to compute intermediate quantites which will be combined (locally) 179 with the results of the inner products. 180 */ 181 PetscCall(KSP_PCApply(ksp, ZVEC(loc_it), Q)); 182 PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat)); 183 PetscCall(KSP_MatMult(ksp, Amat, Q, W)); 184 185 /* Compute inner products of the new direction with previous directions, 186 and the norm of the to-be-orthogonalized direction "Z". 187 This information is enough to build the required entries 188 of H. The inner product with VEC_VV(it_loc) is 189 *different* than in the standard FGMRES and need to be dealt with specially. 190 That is, for standard FGMRES the orthogonalization coefficients are the same 191 as the coefficients used in the Arnoldi relation to reconstruct, but here this 192 is not true (albeit only for the one entry of H which we "unshift" below. */ 193 194 /* Finish the dot product, retrieving the extra entry */ 195 PetscCall(VecMDotEnd(ZVEC(loc_it), loc_it + 2, redux, lhh)); 196 tt = PetscRealPart(lhh[loc_it + 1]); 197 198 /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt 199 Note that the Hessenberg entries require a shift, as these are for the 200 relation AU = VH, which is wrt unshifted basis vectors */ 201 hh = HH(0, loc_it); 202 hes = HES(0, loc_it); 203 for (j = 0; j < loc_it; j++) { 204 hh[j] = lhh[j]; 205 hes[j] = lhh[j]; 206 } 207 hh[loc_it] = lhh[loc_it] + shift; 208 hes[loc_it] = lhh[loc_it] + shift; 209 210 /* we delay applying the shift here */ 211 for (j = 0; j <= loc_it; j++) { lhh[j] = -lhh[j]; /* flip sign */ } 212 213 /* Compute the norm of the un-normalized new direction using the rearranged formula 214 Note that these are shifted ("barred") quantities */ 215 for (k = 0; k <= loc_it; k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]))); 216 /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */ 217 if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0; 218 if (tt < 0.0) { 219 /* If we detect square root breakdown in the norm, we must restart the algorithm. 220 Here this means we simply break the current loop and reconstruct the solution 221 using the basis we have computed thus far. Note that by breaking immediately, 222 we do not update the iteration count, so computation done in this iteration 223 should be disregarded. 224 */ 225 PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n", ksp->its, (double)tt)); 226 break; 227 } else { 228 tt = PetscSqrtReal(tt); 229 } 230 231 /* new entry in hessenburg is the 2-norm of our new direction */ 232 hh[loc_it + 1] = tt; 233 hes[loc_it + 1] = tt; 234 235 /* The recurred computation for the new direction 236 The division by tt is delayed to the happy breakdown check later 237 Note placement BEFORE the unshift 238 */ 239 PetscCall(VecCopy(ZVEC(loc_it), VEC_VV(loc_it + 1))); 240 PetscCall(VecMAXPY(VEC_VV(loc_it + 1), loc_it + 1, lhh, &VEC_VV(0))); 241 /* (VEC_VV(loc_it+1) is not normalized yet) */ 242 243 /* The recurred computation for the preconditioned vector (u) */ 244 PetscCall(VecCopy(Q, PREVEC(loc_it + 1))); 245 PetscCall(VecMAXPY(PREVEC(loc_it + 1), loc_it + 1, lhh, &PREVEC(0))); 246 if (tt) PetscCall(VecScale(PREVEC(loc_it + 1), 1.0 / tt)); 247 248 /* Unshift an entry in the GS coefficients ("removing the bar") */ 249 lhh[loc_it] -= shift; 250 251 /* The recurred computation for z (Au) 252 Note placement AFTER the "unshift" */ 253 PetscCall(VecCopy(W, ZVEC(loc_it + 1))); 254 PetscCall(VecMAXPY(ZVEC(loc_it + 1), loc_it + 1, lhh, &ZVEC(0))); 255 if (tt) PetscCall(VecScale(ZVEC(loc_it + 1), 1.0 / tt)); 256 257 /* Happy Breakdown Check */ 258 hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); 259 /* RS(loc_it) contains the res_norm from the last iteration */ 260 hapbnd = PetscMin(pipefgmres->haptol, hapbnd); 261 if (tt > hapbnd) { 262 /* scale new direction by its norm */ 263 PetscCall(VecScale(VEC_VV(loc_it + 1), 1.0 / tt)); 264 } else { 265 /* This happens when the solution is exactly reached. */ 266 /* So there is no new direction... */ 267 PetscCall(VecSet(VEC_TEMP, 0.0)); /* set VEC_TEMP to 0 */ 268 hapend = PETSC_TRUE; 269 } 270 /* note that for pipefgmres we could get HES(loc_it+1, loc_it) = 0 and the 271 current solution would not be exact if HES was singular. Note that 272 HH non-singular implies that HES is not singular, and HES is guaranteed 273 to be nonsingular when PREVECS are linearly independent and A is 274 nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity 275 of HES). So we should really add a check to verify that HES is nonsingular.*/ 276 277 /* Note that to be thorough, in debug mode, one could call a LAPACK routine 278 here to check that the hessenberg matrix is indeed non-singular (since 279 FGMRES does not guarantee this) */ 280 281 /* Now apply rotations to new col of hessenberg (and right side of system), 282 calculate new rotation, and get new residual norm at the same time*/ 283 PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp, loc_it, &hapend, &res_norm)); 284 if (ksp->reason) break; 285 286 loc_it++; 287 pipefgmres->it = (loc_it - 1); /* Add this here in case it has converged */ 288 289 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 290 ksp->its++; 291 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 292 else ksp->rnorm = 0; 293 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 294 295 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP)); 296 297 /* Catch error in happy breakdown and signal convergence and break from loop */ 298 if (hapend) { 299 if (!ksp->reason) { 300 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "You reached the happy break down, but convergence was not indicated. Residual norm = %g", (double)res_norm); 301 ksp->reason = KSP_DIVERGED_BREAKDOWN; 302 break; 303 } 304 } 305 } 306 /* END OF ITERATION LOOP */ 307 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 308 309 /* 310 Monitor if we know that we will not return for a restart */ 311 if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm)); 312 313 if (itcount) *itcount = loc_it; 314 315 /* 316 Down here we have to solve for the "best" coefficients of the Krylov 317 columns, add the solution values together, and possibly unwind the 318 preconditioning from the solution 319 */ 320 321 /* Form the solution (or the solution so far) */ 322 /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln 323 properly navigates */ 324 325 PetscCall(KSPPIPEFGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, loc_it - 1)); 326 327 PetscFunctionReturn(PETSC_SUCCESS); 328 } 329 330 /* 331 KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method. 332 333 Input Parameter: 334 . ksp - the Krylov space object that was set to use pipefgmres 335 336 Output Parameter: 337 . outits - number of iterations used 338 339 */ 340 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) 341 { 342 PetscInt its, itcount; 343 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 344 PetscBool guess_zero = ksp->guess_zero; 345 346 PetscFunctionBegin; 347 /* We have not checked these routines for use with complex numbers. The inner products 348 are likely not defined correctly for that case */ 349 PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars"); 350 351 PetscCall(PetscCitationsRegister(citation, &cited)); 352 353 PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); 354 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 355 ksp->its = 0; 356 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 357 358 itcount = 0; 359 ksp->reason = KSP_CONVERGED_ITERATING; 360 while (!ksp->reason) { 361 PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs)); 362 PetscCall(KSPPIPEFGMRESCycle(&its, ksp)); 363 itcount += its; 364 if (itcount >= ksp->max_it) { 365 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; 366 break; 367 } 368 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ 369 } 370 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ 371 PetscFunctionReturn(PETSC_SUCCESS); 372 } 373 374 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) 375 { 376 PetscFunctionBegin; 377 PetscCall(KSPReset_PIPEFGMRES(ksp)); 378 PetscCall(KSPDestroy_GMRES(ksp)); 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 /* 383 KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the 384 current iterates. 385 386 Input parameters: 387 nrs - work area of size it + 1. 388 vguess - index of initial guess 389 vdest - index of result. Note that vguess may == vdest (replace 390 guess with the solution). 391 it - HH upper triangular part is a block of size (it+1) x (it+1) 392 393 This is an internal routine that knows about the PIPEFGMRES internals. 394 */ 395 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it) 396 { 397 PetscScalar tt; 398 PetscInt k, j; 399 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data); 400 401 PetscFunctionBegin; 402 /* Solve for solution vector that minimizes the residual */ 403 404 if (it < 0) { /* no pipefgmres steps have been performed */ 405 PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */ 406 PetscFunctionReturn(PETSC_SUCCESS); 407 } 408 409 /* solve the upper triangular system - RS is the right side and HH is 410 the upper triangular matrix - put soln in nrs */ 411 if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it); 412 else nrs[it] = 0.0; 413 414 for (k = it - 1; k >= 0; k--) { 415 tt = *RS(k); 416 for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j]; 417 nrs[k] = tt / *HH(k, k); 418 } 419 420 /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */ 421 PetscCall(VecZeroEntries(VEC_TEMP)); 422 PetscCall(VecMAXPY(VEC_TEMP, it + 1, nrs, &PREVEC(0))); 423 424 /* add solution to previous solution */ 425 if (vdest == vguess) { 426 PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP)); 427 } else { 428 PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess)); 429 } 430 PetscFunctionReturn(PETSC_SUCCESS); 431 } 432 433 /* 434 435 KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization. 436 Return new residual. 437 438 input parameters: 439 440 . ksp - Krylov space object 441 . it - plane rotations are applied to the (it+1)th column of the 442 modified hessenberg (i.e. HH(:,it)) 443 . hapend - PETSC_FALSE not happy breakdown ending. 444 445 output parameters: 446 . res - the new residual 447 448 */ 449 /* 450 . it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this 451 */ 452 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res) 453 { 454 PetscScalar *hh, *cc, *ss, *rs; 455 PetscInt j; 456 PetscReal hapbnd; 457 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data); 458 459 PetscFunctionBegin; 460 hh = HH(0, it); /* pointer to beginning of column to update */ 461 cc = CC(0); /* beginning of cosine rotations */ 462 ss = SS(0); /* beginning of sine rotations */ 463 rs = RS(0); /* right hand side of least squares system */ 464 465 /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ 466 for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j]; 467 468 /* check for the happy breakdown */ 469 hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pipefgmres->haptol); 470 if (PetscAbsScalar(hh[it + 1]) < hapbnd) { 471 PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it)))); 472 *hapend = PETSC_TRUE; 473 } 474 475 /* Apply all the previously computed plane rotations to the new column 476 of the Hessenberg matrix */ 477 /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), 478 and some refs have [c s ; -conj(s) c] (don't be confused!) */ 479 480 for (j = 0; j < it; j++) { 481 PetscScalar hhj = hh[j]; 482 hh[j] = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1]; 483 hh[j + 1] = -ss[j] * hhj + cc[j] * hh[j + 1]; 484 } 485 486 /* 487 compute the new plane rotation, and apply it to: 488 1) the right-hand-side of the Hessenberg system (RS) 489 note: it affects RS(it) and RS(it+1) 490 2) the new column of the Hessenberg matrix 491 note: it affects HH(it,it) which is currently pointed to 492 by hh and HH(it+1, it) (*(hh+1)) 493 thus obtaining the updated value of the residual... 494 */ 495 496 /* compute new plane rotation */ 497 498 if (!*hapend) { 499 PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1]))); 500 if (delta == 0.0) { 501 ksp->reason = KSP_DIVERGED_NULL; 502 PetscFunctionReturn(PETSC_SUCCESS); 503 } 504 505 cc[it] = hh[it] / delta; /* new cosine value */ 506 ss[it] = hh[it + 1] / delta; /* new sine value */ 507 508 hh[it] = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1]; 509 rs[it + 1] = -ss[it] * rs[it]; 510 rs[it] = PetscConj(cc[it]) * rs[it]; 511 *res = PetscAbsScalar(rs[it + 1]); 512 } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply 513 another rotation matrix (so RH doesn't change). The new residual is 514 always the new sine term times the residual from last time (RS(it)), 515 but now the new sine rotation would be zero...so the residual should 516 be zero...so we will multiply "zero" by the last residual. This might 517 not be exactly what we want to do here -could just return "zero". */ 518 519 *res = 0.0; 520 } 521 PetscFunctionReturn(PETSC_SUCCESS); 522 } 523 524 /* 525 KSPBuildSolution_PIPEFGMRES 526 527 Input Parameter: 528 . ksp - the Krylov space object 529 . ptr- 530 531 Output Parameter: 532 . result - the solution 533 534 Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle 535 calls directly. 536 537 */ 538 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp, Vec ptr, Vec *result) 539 { 540 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 541 542 PetscFunctionBegin; 543 if (!ptr) { 544 if (!pipefgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &pipefgmres->sol_temp)); 545 ptr = pipefgmres->sol_temp; 546 } 547 if (!pipefgmres->nrs) { 548 /* allocate the work area */ 549 PetscCall(PetscMalloc1(pipefgmres->max_k, &pipefgmres->nrs)); 550 } 551 552 PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs, ksp->vec_sol, ptr, ksp, pipefgmres->it)); 553 if (result) *result = ptr; 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject) 558 { 559 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 560 PetscBool flg; 561 PetscScalar shift; 562 563 PetscFunctionBegin; 564 PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject)); 565 PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined FGMRES Options"); 566 PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift", "shift parameter", "KSPPIPEFGMRESSetShift", pipefgmres->shift, &shift, &flg)); 567 if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp, shift)); 568 PetscOptionsHeadEnd(); 569 PetscFunctionReturn(PETSC_SUCCESS); 570 } 571 572 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp, PetscViewer viewer) 573 { 574 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 575 PetscBool iascii, isstring; 576 577 PetscFunctionBegin; 578 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 579 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring)); 580 581 if (iascii) { 582 PetscCall(PetscViewerASCIIPrintf(viewer, " restart=%" PetscInt_FMT "\n", pipefgmres->max_k)); 583 PetscCall(PetscViewerASCIIPrintf(viewer, " happy breakdown tolerance %g\n", (double)pipefgmres->haptol)); 584 #if defined(PETSC_USE_COMPLEX) 585 PetscCall(PetscViewerASCIIPrintf(viewer, " shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift))); 586 #else 587 PetscCall(PetscViewerASCIIPrintf(viewer, " shift=%g\n", (double)pipefgmres->shift)); 588 #endif 589 } else if (isstring) { 590 PetscCall(PetscViewerStringSPrintf(viewer, "restart %" PetscInt_FMT, pipefgmres->max_k)); 591 #if defined(PETSC_USE_COMPLEX) 592 PetscCall(PetscViewerStringSPrintf(viewer, " shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift))); 593 #else 594 PetscCall(PetscViewerStringSPrintf(viewer, " shift=%g\n", (double)pipefgmres->shift)); 595 #endif 596 } 597 PetscFunctionReturn(PETSC_SUCCESS); 598 } 599 600 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) 601 { 602 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 603 PetscInt i; 604 605 PetscFunctionBegin; 606 PetscCall(PetscFree(pipefgmres->prevecs)); 607 PetscCall(PetscFree(pipefgmres->zvecs)); 608 for (i = 0; i < pipefgmres->nwork_alloc; i++) { 609 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->prevecs_user_work[i])); 610 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->zvecs_user_work[i])); 611 } 612 PetscCall(PetscFree(pipefgmres->prevecs_user_work)); 613 PetscCall(PetscFree(pipefgmres->zvecs_user_work)); 614 PetscCall(PetscFree(pipefgmres->redux)); 615 PetscCall(KSPReset_GMRES(ksp)); 616 PetscFunctionReturn(PETSC_SUCCESS); 617 } 618 619 /*MC 620 KSPPIPEFGMRES - Implements the Pipelined (1-stage) Flexible Generalized Minimal Residual method. [](sec_pipelineksp). [](sec_flexibleksp) 621 622 Options Database Keys: 623 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against 624 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence) 625 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of 626 . -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift() 627 vectors are allocated as needed) 628 - -ksp_gmres_krylov_monitor - plot the Krylov space generated 629 630 Level: intermediate 631 632 Notes: 633 This variant is not "explicitly normalized" like `KSPPGMRES`, and requires a shift parameter. 634 635 A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator. 636 637 Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with `KSPFGMRES`). 638 639 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods. 640 See [](doc_faq_pipelined) 641 642 Developer Note: 643 This class is subclassed off of `KSPGMRES`. 644 645 Contributed by: 646 P. Sanan and S.M. Schnepp 647 648 Reference: 649 P. Sanan, S.M. Schnepp, and D.A. May, 650 "Pipelined, Flexible Krylov Subspace Methods," 651 SIAM Journal on Scientific Computing 2016 38:5, C441-C470, 652 DOI: 10.1137/15M1049130 653 654 .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), [](sec_flexibleksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES` 655 `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()` 656 M*/ 657 658 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) 659 { 660 KSP_PIPEFGMRES *pipefgmres; 661 662 PetscFunctionBegin; 663 PetscCall(PetscNew(&pipefgmres)); 664 665 ksp->data = (void *)pipefgmres; 666 ksp->ops->buildsolution = KSPBuildSolution_PIPEFGMRES; 667 ksp->ops->setup = KSPSetUp_PIPEFGMRES; 668 ksp->ops->solve = KSPSolve_PIPEFGMRES; 669 ksp->ops->reset = KSPReset_PIPEFGMRES; 670 ksp->ops->destroy = KSPDestroy_PIPEFGMRES; 671 ksp->ops->view = KSPView_PIPEFGMRES; 672 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFGMRES; 673 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES; 674 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES; 675 676 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 3)); 677 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1)); 678 679 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES)); 680 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES)); 681 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES)); 682 683 pipefgmres->nextra_vecs = 1; 684 pipefgmres->haptol = 1.0e-30; 685 pipefgmres->q_preallocate = 0; 686 pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS; 687 pipefgmres->orthog = NULL; 688 pipefgmres->nrs = NULL; 689 pipefgmres->sol_temp = NULL; 690 pipefgmres->max_k = PIPEFGMRES_DEFAULT_MAXK; 691 pipefgmres->Rsvd = NULL; 692 pipefgmres->orthogwork = NULL; 693 pipefgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER; 694 pipefgmres->shift = 1.0; 695 PetscFunctionReturn(PETSC_SUCCESS); 696 } 697 698 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp, PetscInt it) 699 { 700 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 701 PetscInt nwork = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */ 702 PetscInt nalloc; /* number to allocate */ 703 PetscInt k; 704 705 PetscFunctionBegin; 706 nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate 707 in a single chunk */ 708 709 /* Adjust the number to allocate to make sure that we don't exceed the 710 number of available slots (pipefgmres->vecs_allocated)*/ 711 if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET; 712 if (!nalloc) PetscFunctionReturn(PETSC_SUCCESS); 713 714 pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */ 715 716 /* work vectors */ 717 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->user_work[nwork], 0, NULL)); 718 for (k = 0; k < nalloc; k++) pipefgmres->vecs[it + VEC_OFFSET + k] = pipefgmres->user_work[nwork][k]; 719 /* specify size of chunk allocated */ 720 pipefgmres->mwork_alloc[nwork] = nalloc; 721 722 /* preconditioned vectors (note we don't use VEC_OFFSET) */ 723 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->prevecs_user_work[nwork], 0, NULL)); 724 for (k = 0; k < nalloc; k++) pipefgmres->prevecs[it + k] = pipefgmres->prevecs_user_work[nwork][k]; 725 726 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->zvecs_user_work[nwork], 0, NULL)); 727 for (k = 0; k < nalloc; k++) pipefgmres->zvecs[it + k] = pipefgmres->zvecs_user_work[nwork][k]; 728 729 /* increment the number of work vector chunks */ 730 pipefgmres->nwork_alloc++; 731 PetscFunctionReturn(PETSC_SUCCESS); 732 } 733 734 /*@ 735 KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined `KSPPIPEFGMRES` solver. 736 737 Logically Collective 738 739 Input Parameters: 740 + ksp - the Krylov space context 741 - shift - the shift 742 743 Options Database Key: 744 . -ksp_pipefgmres_shift <shift> - set the shift parameter 745 746 Level: intermediate 747 748 Note: 749 A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. 750 This can be achieved with PETSc itself by using a few iterations of a Krylov method. 751 See `KSPComputeEigenvalues()` (and note the caveats there). 752 753 .seealso: [](ch_ksp), `KSPPIPEFGMRES`, `KSPComputeEigenvalues()` 754 @*/ 755 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp, PetscScalar shift) 756 { 757 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 758 759 PetscFunctionBegin; 760 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1); 761 PetscValidLogicalCollectiveScalar(ksp, shift, 2); 762 pipefgmres->shift = shift; 763 PetscFunctionReturn(PETSC_SUCCESS); 764 } 765