1 /* 2 Contributed by Patrick Sanan and Sascha M. Schnepp 3 */ 4 5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I "petscksp.h" I*/ 6 7 static PetscBool cited = PETSC_FALSE; 8 static const char citation[] = "@article{SSM2016,\n" 9 " author = {P. Sanan and S.M. Schnepp and D.A. May},\n" 10 " title = {Pipelined, Flexible Krylov Subspace Methods},\n" 11 " journal = {SIAM Journal on Scientific Computing},\n" 12 " volume = {38},\n" 13 " number = {5},\n" 14 " pages = {C441-C470},\n" 15 " year = {2016},\n" 16 " doi = {10.1137/15M1049130},\n" 17 " URL = {http://dx.doi.org/10.1137/15M1049130},\n" 18 " eprint = {http://dx.doi.org/10.1137/15M1049130}\n" 19 "}\n"; 20 21 #define PIPEFGMRES_DELTA_DIRECTIONS 10 22 #define PIPEFGMRES_DEFAULT_MAXK 30 23 24 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP, PetscInt); 25 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *); 26 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt); 27 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP); 28 29 /* 30 31 KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres. 32 33 This is called once, usually automatically by KSPSolve() or KSPSetUp(), 34 but can be called directly by KSPSetUp(). 35 36 */ 37 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) { 38 PetscInt k; 39 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 40 const PetscInt max_k = pipefgmres->max_k; 41 42 PetscFunctionBegin; 43 PetscCall(KSPSetUp_GMRES(ksp)); 44 45 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs)); 46 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs_user_work)); 47 PetscCall(PetscLogObjectMemory((PetscObject)ksp, (VEC_OFFSET + max_k) * (2 * sizeof(void *)))); 48 49 PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->prevecs_user_work[0], 0, NULL)); 50 PetscCall(PetscLogObjectParents(ksp, pipefgmres->vv_allocated, pipefgmres->prevecs_user_work[0])); 51 for (k = 0; k < pipefgmres->vv_allocated; k++) { pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k]; } 52 53 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs)); 54 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs_user_work)); 55 PetscCall(PetscLogObjectMemory((PetscObject)ksp, (VEC_OFFSET + max_k) * (2 * sizeof(void *)))); 56 57 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->redux)); 58 PetscCall(PetscLogObjectMemory((PetscObject)ksp, (VEC_OFFSET + max_k) * (sizeof(void *)))); 59 60 PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->zvecs_user_work[0], 0, NULL)); 61 PetscCall(PetscLogObjectParents(ksp, pipefgmres->vv_allocated, pipefgmres->zvecs_user_work[0])); 62 for (k = 0; k < pipefgmres->vv_allocated; k++) { pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k]; } 63 64 PetscFunctionReturn(0); 65 } 66 67 /* 68 69 KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart. Return residual 70 history if requested. 71 72 input parameters: 73 . pipefgmres - structure containing parameters and work areas 74 75 output parameters: 76 . itcount - number of iterations used. If null, ignored. 77 . converged - 0 if not converged 78 79 Notes: 80 On entry, the value in vector VEC_VV(0) should be 81 the initial residual. 82 83 */ 84 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount, KSP ksp) { 85 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data); 86 PetscReal res_norm; 87 PetscReal hapbnd, tt; 88 PetscScalar *hh, *hes, *lhh, shift = pipefgmres->shift; 89 PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ 90 PetscInt loc_it; /* local count of # of dir. in Krylov space */ 91 PetscInt max_k = pipefgmres->max_k; /* max # of directions Krylov space */ 92 PetscInt i, j, k; 93 Mat Amat, Pmat; 94 Vec Q, W; /* Pipelining vectors */ 95 Vec *redux = pipefgmres->redux; /* workspace for single reduction */ 96 97 PetscFunctionBegin; 98 if (itcount) *itcount = 0; 99 100 /* Assign simpler names to these vectors, allocated as pipelining workspace */ 101 Q = VEC_Q; 102 W = VEC_W; 103 104 /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/ 105 /* Note that we add an extra value here to allow for a single reduction */ 106 if (!pipefgmres->orthogwork) { PetscCall(PetscMalloc1(pipefgmres->max_k + 2, &pipefgmres->orthogwork)); } 107 lhh = pipefgmres->orthogwork; 108 109 /* Number of pseudo iterations since last restart is the number 110 of prestart directions */ 111 loc_it = 0; 112 113 /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in 114 KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln. 115 Note that when KSPPIPEFGMRESBuildSoln is called from this function, 116 (loc_it -1) is passed, so the two are equivalent */ 117 pipefgmres->it = (loc_it - 1); 118 119 /* initial residual is in VEC_VV(0) - compute its norm*/ 120 PetscCall(VecNorm(VEC_VV(0), NORM_2, &res_norm)); 121 122 /* first entry in right-hand-side of hessenberg system is just 123 the initial residual norm */ 124 *RS(0) = res_norm; 125 126 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 127 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 128 else ksp->rnorm = 0; 129 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 130 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 131 PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm)); 132 133 /* check for the convergence - maybe the current guess is good enough */ 134 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP)); 135 if (ksp->reason) { 136 if (itcount) *itcount = 0; 137 PetscFunctionReturn(0); 138 } 139 140 /* scale VEC_VV (the initial residual) */ 141 PetscCall(VecScale(VEC_VV(0), 1.0 / res_norm)); 142 143 /* Fill the pipeline */ 144 PetscCall(KSP_PCApply(ksp, VEC_VV(loc_it), PREVEC(loc_it))); 145 PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat)); 146 PetscCall(KSP_MatMult(ksp, Amat, PREVEC(loc_it), ZVEC(loc_it))); 147 PetscCall(VecAXPY(ZVEC(loc_it), -shift, VEC_VV(loc_it))); /* Note shift */ 148 149 /* MAIN ITERATION LOOP BEGINNING*/ 150 /* keep iterating until we have converged OR generated the max number 151 of directions OR reached the max number of iterations for the method */ 152 while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { 153 if (loc_it) { 154 PetscCall(KSPLogResidualHistory(ksp, res_norm)); 155 PetscCall(KSPMonitor(ksp, ksp->its, res_norm)); 156 } 157 pipefgmres->it = (loc_it - 1); 158 159 /* see if more space is needed for work vectors */ 160 if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { 161 PetscCall(KSPPIPEFGMRESGetNewVectors(ksp, loc_it + 1)); 162 /* (loc_it+1) is passed in as number of the first vector that should 163 be allocated */ 164 } 165 166 /* Note that these inner products are with "Z" now, so 167 in particular, lhh[loc_it] is the 'barred' or 'shifted' value, 168 not the value from the equivalent FGMRES run (even in exact arithmetic) 169 That is, the H we need for the Arnoldi relation is different from the 170 coefficients we use in the orthogonalization process,because of the shift */ 171 172 /* Do some local twiddling to allow for a single reduction */ 173 for (i = 0; i < loc_it + 1; i++) { redux[i] = VEC_VV(i); } 174 redux[loc_it + 1] = ZVEC(loc_it); 175 176 /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */ 177 PetscCall(VecMDotBegin(ZVEC(loc_it), loc_it + 2, redux, lhh)); 178 179 /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/ 180 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)))); 181 182 /* The work to be overlapped with the inner products follows. 183 This is application of the preconditioner and the operator 184 to compute intermediate quantites which will be combined (locally) 185 with the results of the inner products. 186 */ 187 PetscCall(KSP_PCApply(ksp, ZVEC(loc_it), Q)); 188 PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat)); 189 PetscCall(KSP_MatMult(ksp, Amat, Q, W)); 190 191 /* Compute inner products of the new direction with previous directions, 192 and the norm of the to-be-orthogonalized direction "Z". 193 This information is enough to build the required entries 194 of H. The inner product with VEC_VV(it_loc) is 195 *different* than in the standard FGMRES and need to be dealt with specially. 196 That is, for standard FGMRES the orthogonalization coefficients are the same 197 as the coefficients used in the Arnoldi relation to reconstruct, but here this 198 is not true (albeit only for the one entry of H which we "unshift" below. */ 199 200 /* Finish the dot product, retrieving the extra entry */ 201 PetscCall(VecMDotEnd(ZVEC(loc_it), loc_it + 2, redux, lhh)); 202 tt = PetscRealPart(lhh[loc_it + 1]); 203 204 /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt 205 Note that the Hessenberg entries require a shift, as these are for the 206 relation AU = VH, which is wrt unshifted basis vectors */ 207 hh = HH(0, loc_it); 208 hes = HES(0, loc_it); 209 for (j = 0; j < loc_it; j++) { 210 hh[j] = lhh[j]; 211 hes[j] = lhh[j]; 212 } 213 hh[loc_it] = lhh[loc_it] + shift; 214 hes[loc_it] = lhh[loc_it] + shift; 215 216 /* we delay applying the shift here */ 217 for (j = 0; j <= loc_it; j++) { lhh[j] = -lhh[j]; /* flip sign */ } 218 219 /* Compute the norm of the un-normalized new direction using the rearranged formula 220 Note that these are shifted ("barred") quantities */ 221 for (k = 0; k <= loc_it; k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]))); 222 /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */ 223 if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0; 224 if (tt < 0.0) { 225 /* If we detect square root breakdown in the norm, we must restart the algorithm. 226 Here this means we simply break the current loop and reconstruct the solution 227 using the basis we have computed thus far. Note that by breaking immediately, 228 we do not update the iteration count, so computation done in this iteration 229 should be disregarded. 230 */ 231 PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n", ksp->its, (double)tt)); 232 break; 233 } else { 234 tt = PetscSqrtReal(tt); 235 } 236 237 /* new entry in hessenburg is the 2-norm of our new direction */ 238 hh[loc_it + 1] = tt; 239 hes[loc_it + 1] = tt; 240 241 /* The recurred computation for the new direction 242 The division by tt is delayed to the happy breakdown check later 243 Note placement BEFORE the unshift 244 */ 245 PetscCall(VecCopy(ZVEC(loc_it), VEC_VV(loc_it + 1))); 246 PetscCall(VecMAXPY(VEC_VV(loc_it + 1), loc_it + 1, lhh, &VEC_VV(0))); 247 /* (VEC_VV(loc_it+1) is not normalized yet) */ 248 249 /* The recurred computation for the preconditioned vector (u) */ 250 PetscCall(VecCopy(Q, PREVEC(loc_it + 1))); 251 PetscCall(VecMAXPY(PREVEC(loc_it + 1), loc_it + 1, lhh, &PREVEC(0))); 252 PetscCall(VecScale(PREVEC(loc_it + 1), 1.0 / tt)); 253 254 /* Unshift an entry in the GS coefficients ("removing the bar") */ 255 lhh[loc_it] -= shift; 256 257 /* The recurred computation for z (Au) 258 Note placement AFTER the "unshift" */ 259 PetscCall(VecCopy(W, ZVEC(loc_it + 1))); 260 PetscCall(VecMAXPY(ZVEC(loc_it + 1), loc_it + 1, lhh, &ZVEC(0))); 261 PetscCall(VecScale(ZVEC(loc_it + 1), 1.0 / tt)); 262 263 /* Happy Breakdown Check */ 264 hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); 265 /* RS(loc_it) contains the res_norm from the last iteration */ 266 hapbnd = PetscMin(pipefgmres->haptol, hapbnd); 267 if (tt > hapbnd) { 268 /* scale new direction by its norm */ 269 PetscCall(VecScale(VEC_VV(loc_it + 1), 1.0 / tt)); 270 } else { 271 /* This happens when the solution is exactly reached. */ 272 /* So there is no new direction... */ 273 PetscCall(VecSet(VEC_TEMP, 0.0)); /* set VEC_TEMP to 0 */ 274 hapend = PETSC_TRUE; 275 } 276 /* note that for pipefgmres we could get HES(loc_it+1, loc_it) = 0 and the 277 current solution would not be exact if HES was singular. Note that 278 HH non-singular implies that HES is not singular, and HES is guaranteed 279 to be nonsingular when PREVECS are linearly independent and A is 280 nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity 281 of HES). So we should really add a check to verify that HES is nonsingular.*/ 282 283 /* Note that to be thorough, in debug mode, one could call a LAPACK routine 284 here to check that the hessenberg matrix is indeed non-singular (since 285 FGMRES does not guarantee this) */ 286 287 /* Now apply rotations to new col of hessenberg (and right side of system), 288 calculate new rotation, and get new residual norm at the same time*/ 289 PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp, loc_it, &hapend, &res_norm)); 290 if (ksp->reason) break; 291 292 loc_it++; 293 pipefgmres->it = (loc_it - 1); /* Add this here in case it has converged */ 294 295 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 296 ksp->its++; 297 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 298 else ksp->rnorm = 0; 299 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 300 301 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP)); 302 303 /* Catch error in happy breakdown and signal convergence and break from loop */ 304 if (hapend) { 305 if (!ksp->reason) { 306 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "You reached the happy break down, but convergence was not indicated. Residual norm = %g", (double)res_norm); 307 ksp->reason = KSP_DIVERGED_BREAKDOWN; 308 break; 309 } 310 } 311 } 312 /* END OF ITERATION LOOP */ 313 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 314 315 /* 316 Monitor if we know that we will not return for a restart */ 317 if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) { PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm)); } 318 319 if (itcount) *itcount = loc_it; 320 321 /* 322 Down here we have to solve for the "best" coefficients of the Krylov 323 columns, add the solution values together, and possibly unwind the 324 preconditioning from the solution 325 */ 326 327 /* Form the solution (or the solution so far) */ 328 /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln 329 properly navigates */ 330 331 PetscCall(KSPPIPEFGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, loc_it - 1)); 332 333 PetscFunctionReturn(0); 334 } 335 336 /* 337 KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method. 338 339 Input Parameter: 340 . ksp - the Krylov space object that was set to use pipefgmres 341 342 Output Parameter: 343 . outits - number of iterations used 344 345 */ 346 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) { 347 PetscInt its, itcount; 348 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 349 PetscBool guess_zero = ksp->guess_zero; 350 351 PetscFunctionBegin; 352 /* We have not checked these routines for use with complex numbers. The inner products 353 are likely not defined correctly for that case */ 354 PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars"); 355 356 PetscCall(PetscCitationsRegister(citation, &cited)); 357 358 PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); 359 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 360 ksp->its = 0; 361 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 362 363 itcount = 0; 364 ksp->reason = KSP_CONVERGED_ITERATING; 365 while (!ksp->reason) { 366 PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs)); 367 PetscCall(KSPPIPEFGMRESCycle(&its, ksp)); 368 itcount += its; 369 if (itcount >= ksp->max_it) { 370 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; 371 break; 372 } 373 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ 374 } 375 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ 376 PetscFunctionReturn(0); 377 } 378 379 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) { 380 PetscFunctionBegin; 381 PetscCall(KSPReset_PIPEFGMRES(ksp)); 382 PetscCall(KSPDestroy_GMRES(ksp)); 383 PetscFunctionReturn(0); 384 } 385 386 /* 387 KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the 388 current iterates. 389 390 Input parameters: 391 nrs - work area of size it + 1. 392 vguess - index of initial guess 393 vdest - index of result. Note that vguess may == vdest (replace 394 guess with the solution). 395 it - HH upper triangular part is a block of size (it+1) x (it+1) 396 397 This is an internal routine that knows about the PIPEFGMRES internals. 398 */ 399 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it) { 400 PetscScalar tt; 401 PetscInt k, j; 402 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data); 403 404 PetscFunctionBegin; 405 /* Solve for solution vector that minimizes the residual */ 406 407 if (it < 0) { /* no pipefgmres steps have been performed */ 408 PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */ 409 PetscFunctionReturn(0); 410 } 411 412 /* solve the upper triangular system - RS is the right side and HH is 413 the upper triangular matrix - put soln in nrs */ 414 if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it); 415 else nrs[it] = 0.0; 416 417 for (k = it - 1; k >= 0; k--) { 418 tt = *RS(k); 419 for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j]; 420 nrs[k] = tt / *HH(k, k); 421 } 422 423 /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */ 424 PetscCall(VecZeroEntries(VEC_TEMP)); 425 PetscCall(VecMAXPY(VEC_TEMP, it + 1, nrs, &PREVEC(0))); 426 427 /* add solution to previous solution */ 428 if (vdest == vguess) { 429 PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP)); 430 } else { 431 PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess)); 432 } 433 PetscFunctionReturn(0); 434 } 435 436 /* 437 438 KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization. 439 Return new residual. 440 441 input parameters: 442 443 . ksp - Krylov space object 444 . it - plane rotations are applied to the (it+1)th column of the 445 modified hessenberg (i.e. HH(:,it)) 446 . hapend - PETSC_FALSE not happy breakdown ending. 447 448 output parameters: 449 . res - the new residual 450 451 */ 452 /* 453 . it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this 454 */ 455 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res) { 456 PetscScalar *hh, *cc, *ss, *rs; 457 PetscInt j; 458 PetscReal hapbnd; 459 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data); 460 461 PetscFunctionBegin; 462 hh = HH(0, it); /* pointer to beginning of column to update */ 463 cc = CC(0); /* beginning of cosine rotations */ 464 ss = SS(0); /* beginning of sine rotations */ 465 rs = RS(0); /* right hand side of least squares system */ 466 467 /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ 468 for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j]; 469 470 /* check for the happy breakdown */ 471 hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pipefgmres->haptol); 472 if (PetscAbsScalar(hh[it + 1]) < hapbnd) { 473 PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it)))); 474 *hapend = PETSC_TRUE; 475 } 476 477 /* Apply all the previously computed plane rotations to the new column 478 of the Hessenberg matrix */ 479 /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), 480 and some refs have [c s ; -conj(s) c] (don't be confused!) */ 481 482 for (j = 0; j < it; j++) { 483 PetscScalar hhj = hh[j]; 484 hh[j] = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1]; 485 hh[j + 1] = -ss[j] * hhj + cc[j] * hh[j + 1]; 486 } 487 488 /* 489 compute the new plane rotation, and apply it to: 490 1) the right-hand-side of the Hessenberg system (RS) 491 note: it affects RS(it) and RS(it+1) 492 2) the new column of the Hessenberg matrix 493 note: it affects HH(it,it) which is currently pointed to 494 by hh and HH(it+1, it) (*(hh+1)) 495 thus obtaining the updated value of the residual... 496 */ 497 498 /* compute new plane rotation */ 499 500 if (!*hapend) { 501 PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1]))); 502 if (delta == 0.0) { 503 ksp->reason = KSP_DIVERGED_NULL; 504 PetscFunctionReturn(0); 505 } 506 507 cc[it] = hh[it] / delta; /* new cosine value */ 508 ss[it] = hh[it + 1] / delta; /* new sine value */ 509 510 hh[it] = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1]; 511 rs[it + 1] = -ss[it] * rs[it]; 512 rs[it] = PetscConj(cc[it]) * rs[it]; 513 *res = PetscAbsScalar(rs[it + 1]); 514 } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply 515 another rotation matrix (so RH doesn't change). The new residual is 516 always the new sine term times the residual from last time (RS(it)), 517 but now the new sine rotation would be zero...so the residual should 518 be zero...so we will multiply "zero" by the last residual. This might 519 not be exactly what we want to do here -could just return "zero". */ 520 521 *res = 0.0; 522 } 523 PetscFunctionReturn(0); 524 } 525 526 /* 527 KSPBuildSolution_PIPEFGMRES 528 529 Input Parameter: 530 . ksp - the Krylov space object 531 . ptr- 532 533 Output Parameter: 534 . result - the solution 535 536 Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle 537 calls directly. 538 539 */ 540 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp, Vec ptr, Vec *result) { 541 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 542 543 PetscFunctionBegin; 544 if (!ptr) { 545 if (!pipefgmres->sol_temp) { 546 PetscCall(VecDuplicate(ksp->vec_sol, &pipefgmres->sol_temp)); 547 PetscCall(PetscLogObjectParent((PetscObject)ksp, (PetscObject)pipefgmres->sol_temp)); 548 } 549 ptr = pipefgmres->sol_temp; 550 } 551 if (!pipefgmres->nrs) { 552 /* allocate the work area */ 553 PetscCall(PetscMalloc1(pipefgmres->max_k, &pipefgmres->nrs)); 554 PetscCall(PetscLogObjectMemory((PetscObject)ksp, pipefgmres->max_k * sizeof(PetscScalar))); 555 } 556 557 PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs, ksp->vec_sol, ptr, ksp, pipefgmres->it)); 558 if (result) *result = ptr; 559 PetscFunctionReturn(0); 560 } 561 562 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject) { 563 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 564 PetscBool flg; 565 PetscScalar shift; 566 567 PetscFunctionBegin; 568 PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject)); 569 PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined FGMRES Options"); 570 PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift", "shift parameter", "KSPPIPEFGMRESSetShift", pipefgmres->shift, &shift, &flg)); 571 if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp, shift)); 572 PetscOptionsHeadEnd(); 573 PetscFunctionReturn(0); 574 } 575 576 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp, PetscViewer viewer) { 577 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 578 PetscBool iascii, isstring; 579 580 PetscFunctionBegin; 581 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 582 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring)); 583 584 if (iascii) { 585 PetscCall(PetscViewerASCIIPrintf(viewer, " restart=%" PetscInt_FMT "\n", pipefgmres->max_k)); 586 PetscCall(PetscViewerASCIIPrintf(viewer, " happy breakdown tolerance %g\n", (double)pipefgmres->haptol)); 587 #if defined(PETSC_USE_COMPLEX) 588 PetscCall(PetscViewerASCIIPrintf(viewer, " shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift))); 589 #else 590 PetscCall(PetscViewerASCIIPrintf(viewer, " shift=%g\n", (double)pipefgmres->shift)); 591 #endif 592 } else if (isstring) { 593 PetscCall(PetscViewerStringSPrintf(viewer, "restart %" PetscInt_FMT, pipefgmres->max_k)); 594 #if defined(PETSC_USE_COMPLEX) 595 PetscCall(PetscViewerStringSPrintf(viewer, " shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift))); 596 #else 597 PetscCall(PetscViewerStringSPrintf(viewer, " shift=%g\n", (double)pipefgmres->shift)); 598 #endif 599 } 600 PetscFunctionReturn(0); 601 } 602 603 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) { 604 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 605 PetscInt i; 606 607 PetscFunctionBegin; 608 PetscCall(PetscFree(pipefgmres->prevecs)); 609 PetscCall(PetscFree(pipefgmres->zvecs)); 610 for (i = 0; i < pipefgmres->nwork_alloc; i++) { 611 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->prevecs_user_work[i])); 612 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->zvecs_user_work[i])); 613 } 614 PetscCall(PetscFree(pipefgmres->prevecs_user_work)); 615 PetscCall(PetscFree(pipefgmres->zvecs_user_work)); 616 PetscCall(PetscFree(pipefgmres->redux)); 617 PetscCall(KSPReset_GMRES(ksp)); 618 PetscFunctionReturn(0); 619 } 620 621 /*MC 622 KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method. 623 624 A flexible, 1-stage pipelined variant of GMRES. 625 626 Options Database Keys: 627 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against 628 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence) 629 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of 630 . -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift() 631 vectors are allocated as needed) 632 - -ksp_gmres_krylov_monitor - plot the Krylov space generated 633 634 Level: intermediate 635 636 Notes: 637 638 This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter. 639 640 A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator. 641 642 Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES). 643 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods. 644 See the FAQ on the PETSc website for details. 645 646 Developer Notes: 647 This class is subclassed off of KSPGMRES. 648 649 Reference: 650 P. Sanan, S.M. Schnepp, and D.A. May, 651 "Pipelined, Flexible Krylov Subspace Methods," 652 SIAM Journal on Scientific Computing 2016 38:5, C441-C470, 653 DOI: 10.1137/15M1049130 654 655 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES` 656 `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()` 657 M*/ 658 659 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) { 660 KSP_PIPEFGMRES *pipefgmres; 661 662 PetscFunctionBegin; 663 PetscCall(PetscNewLog(ksp, &pipefgmres)); 664 665 ksp->data = (void *)pipefgmres; 666 ksp->ops->buildsolution = KSPBuildSolution_PIPEFGMRES; 667 ksp->ops->setup = KSPSetUp_PIPEFGMRES; 668 ksp->ops->solve = KSPSolve_PIPEFGMRES; 669 ksp->ops->reset = KSPReset_PIPEFGMRES; 670 ksp->ops->destroy = KSPDestroy_PIPEFGMRES; 671 ksp->ops->view = KSPView_PIPEFGMRES; 672 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFGMRES; 673 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES; 674 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES; 675 676 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 3)); 677 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1)); 678 679 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES)); 680 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES)); 681 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES)); 682 683 pipefgmres->nextra_vecs = 1; 684 pipefgmres->haptol = 1.0e-30; 685 pipefgmres->q_preallocate = 0; 686 pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS; 687 pipefgmres->orthog = NULL; 688 pipefgmres->nrs = NULL; 689 pipefgmres->sol_temp = NULL; 690 pipefgmres->max_k = PIPEFGMRES_DEFAULT_MAXK; 691 pipefgmres->Rsvd = NULL; 692 pipefgmres->orthogwork = NULL; 693 pipefgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER; 694 pipefgmres->shift = 1.0; 695 PetscFunctionReturn(0); 696 } 697 698 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp, PetscInt it) { 699 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 700 PetscInt nwork = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */ 701 PetscInt nalloc; /* number to allocate */ 702 PetscInt k; 703 704 PetscFunctionBegin; 705 nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate 706 in a single chunk */ 707 708 /* Adjust the number to allocate to make sure that we don't exceed the 709 number of available slots (pipefgmres->vecs_allocated)*/ 710 if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) { nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET; } 711 if (!nalloc) PetscFunctionReturn(0); 712 713 pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */ 714 715 /* work vectors */ 716 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->user_work[nwork], 0, NULL)); 717 PetscCall(PetscLogObjectParents(ksp, nalloc, pipefgmres->user_work[nwork])); 718 for (k = 0; k < nalloc; k++) { pipefgmres->vecs[it + VEC_OFFSET + k] = pipefgmres->user_work[nwork][k]; } 719 /* specify size of chunk allocated */ 720 pipefgmres->mwork_alloc[nwork] = nalloc; 721 722 /* preconditioned vectors (note we don't use VEC_OFFSET) */ 723 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->prevecs_user_work[nwork], 0, NULL)); 724 PetscCall(PetscLogObjectParents(ksp, nalloc, pipefgmres->prevecs_user_work[nwork])); 725 for (k = 0; k < nalloc; k++) { pipefgmres->prevecs[it + k] = pipefgmres->prevecs_user_work[nwork][k]; } 726 727 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->zvecs_user_work[nwork], 0, NULL)); 728 PetscCall(PetscLogObjectParents(ksp, nalloc, pipefgmres->zvecs_user_work[nwork])); 729 for (k = 0; k < nalloc; k++) { pipefgmres->zvecs[it + k] = pipefgmres->zvecs_user_work[nwork][k]; } 730 731 /* increment the number of work vector chunks */ 732 pipefgmres->nwork_alloc++; 733 PetscFunctionReturn(0); 734 } 735 736 /*@ 737 KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver. 738 739 A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there). 740 741 Logically Collective on ksp 742 743 Input Parameters: 744 + ksp - the Krylov space context 745 - shift - the shift 746 747 Level: intermediate 748 749 Options Database: 750 . -ksp_pipefgmres_shift <shift> - set the shift parameter 751 752 .seealso: `KSPComputeEigenvalues()` 753 @*/ 754 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp, PetscScalar shift) { 755 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 756 757 PetscFunctionBegin; 758 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1); 759 PetscValidLogicalCollectiveScalar(ksp, shift, 2); 760 pipefgmres->shift = shift; 761 PetscFunctionReturn(0); 762 } 763