1 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I "petscksp.h" I*/ 2 3 static PetscBool cited = PETSC_FALSE; 4 static const char citation[] = "@article{SSM2016,\n" 5 " author = {P. Sanan and S.M. Schnepp and D.A. May},\n" 6 " title = {Pipelined, Flexible Krylov Subspace Methods},\n" 7 " journal = {SIAM Journal on Scientific Computing},\n" 8 " volume = {38},\n" 9 " number = {5},\n" 10 " pages = {C441-C470},\n" 11 " year = {2016},\n" 12 " doi = {10.1137/15M1049130},\n" 13 " URL = {http://dx.doi.org/10.1137/15M1049130},\n" 14 " eprint = {http://dx.doi.org/10.1137/15M1049130}\n" 15 "}\n"; 16 17 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP, PetscInt); 18 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *); 19 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt); 20 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP); 21 22 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) 23 { 24 PetscInt k; 25 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 26 const PetscInt max_k = pipefgmres->max_k; 27 28 PetscFunctionBegin; 29 PetscCall(KSPSetUp_GMRES(ksp)); 30 31 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs)); 32 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs_user_work)); 33 34 PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->prevecs_user_work[0], 0, NULL)); 35 for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k]; 36 37 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs)); 38 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs_user_work)); 39 40 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->redux)); 41 42 PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->zvecs_user_work[0], 0, NULL)); 43 for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k]; 44 PetscFunctionReturn(PETSC_SUCCESS); 45 } 46 47 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount, KSP ksp) 48 { 49 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 50 PetscReal res_norm; 51 PetscReal hapbnd, tt; 52 PetscScalar *hh, *hes, *lhh, shift = pipefgmres->shift; 53 PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ 54 PetscInt loc_it; /* local count of # of dir. in Krylov space */ 55 PetscInt max_k = pipefgmres->max_k; /* max # of directions Krylov space */ 56 PetscInt i, j, k; 57 Mat Amat, Pmat; 58 Vec Q, W; /* Pipelining vectors */ 59 Vec *redux = pipefgmres->redux; /* workspace for single reduction */ 60 61 PetscFunctionBegin; 62 if (itcount) *itcount = 0; 63 64 /* Assign simpler names to these vectors, allocated as pipelining workspace */ 65 Q = VEC_Q; 66 W = VEC_W; 67 68 /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/ 69 /* Note that we add an extra value here to allow for a single reduction */ 70 if (!pipefgmres->orthogwork) PetscCall(PetscMalloc1(pipefgmres->max_k + 2, &pipefgmres->orthogwork)); 71 lhh = pipefgmres->orthogwork; 72 73 /* Number of pseudo iterations since last restart is the number 74 of prestart directions */ 75 loc_it = 0; 76 77 /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in 78 KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln. 79 Note that when KSPPIPEFGMRESBuildSoln is called from this function, 80 (loc_it -1) is passed, so the two are equivalent */ 81 pipefgmres->it = (loc_it - 1); 82 83 /* initial residual is in VEC_VV(0) - compute its norm*/ 84 PetscCall(VecNorm(VEC_VV(0), NORM_2, &res_norm)); 85 86 /* first entry in the right-hand side of the Hessenberg system is just 87 the initial residual norm */ 88 *RS(0) = res_norm; 89 90 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 91 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 92 else ksp->rnorm = 0; 93 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 94 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 95 PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm)); 96 97 /* check for the convergence - maybe the current guess is good enough */ 98 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP)); 99 if (ksp->reason) { 100 if (itcount) *itcount = 0; 101 PetscFunctionReturn(PETSC_SUCCESS); 102 } 103 104 /* scale VEC_VV (the initial residual) */ 105 PetscCall(VecScale(VEC_VV(0), 1.0 / res_norm)); 106 107 /* Fill the pipeline */ 108 PetscCall(KSP_PCApply(ksp, VEC_VV(loc_it), PREVEC(loc_it))); 109 PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat)); 110 PetscCall(KSP_MatMult(ksp, Amat, PREVEC(loc_it), ZVEC(loc_it))); 111 PetscCall(VecAXPY(ZVEC(loc_it), -shift, VEC_VV(loc_it))); /* Note shift */ 112 113 /* MAIN ITERATION LOOP BEGINNING*/ 114 /* keep iterating until we have converged OR generated the max number 115 of directions OR reached the max number of iterations for the method */ 116 while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { 117 if (loc_it) { 118 PetscCall(KSPLogResidualHistory(ksp, res_norm)); 119 PetscCall(KSPMonitor(ksp, ksp->its, res_norm)); 120 } 121 pipefgmres->it = (loc_it - 1); 122 123 /* see if more space is needed for work vectors */ 124 if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { 125 PetscCall(KSPPIPEFGMRESGetNewVectors(ksp, loc_it + 1)); 126 /* (loc_it+1) is passed in as number of the first vector that should be allocated */ 127 } 128 129 /* Note that these inner products are with "Z" now, so 130 in particular, lhh[loc_it] is the 'barred' or 'shifted' value, 131 not the value from the equivalent FGMRES run (even in exact arithmetic) 132 That is, the H we need for the Arnoldi relation is different from the 133 coefficients we use in the orthogonalization process,because of the shift */ 134 135 /* Do some local twiddling to allow for a single reduction */ 136 for (i = 0; i < loc_it + 1; i++) redux[i] = VEC_VV(i); 137 redux[loc_it + 1] = ZVEC(loc_it); 138 139 /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */ 140 PetscCall(VecMDotBegin(ZVEC(loc_it), loc_it + 2, redux, lhh)); 141 142 /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/ 143 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)))); 144 145 /* The work to be overlapped with the inner products follows. 146 This is application of the preconditioner and the operator 147 to compute intermediate quantities which will be combined (locally) 148 with the results of the inner products. 149 */ 150 PetscCall(KSP_PCApply(ksp, ZVEC(loc_it), Q)); 151 PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat)); 152 PetscCall(KSP_MatMult(ksp, Amat, Q, W)); 153 154 /* Compute inner products of the new direction with previous directions, 155 and the norm of the to-be-orthogonalized direction "Z". 156 This information is enough to build the required entries 157 of H. The inner product with VEC_VV(it_loc) is 158 *different* than in the standard FGMRES and need to be dealt with specially. 159 That is, for standard FGMRES the orthogonalization coefficients are the same 160 as the coefficients used in the Arnoldi relation to reconstruct, but here this 161 is not true (albeit only for the one entry of H which we "unshift" below. */ 162 163 /* Finish the dot product, retrieving the extra entry */ 164 PetscCall(VecMDotEnd(ZVEC(loc_it), loc_it + 2, redux, lhh)); 165 tt = PetscRealPart(lhh[loc_it + 1]); 166 167 /* Hessenberg entries, and entries for (naive) classical Gram-Schmidt 168 Note that the Hessenberg entries require a shift, as these are for the 169 relation AU = VH, which is wrt unshifted basis vectors */ 170 hh = HH(0, loc_it); 171 hes = HES(0, loc_it); 172 for (j = 0; j < loc_it; j++) { 173 hh[j] = lhh[j]; 174 hes[j] = lhh[j]; 175 } 176 hh[loc_it] = lhh[loc_it] + shift; 177 hes[loc_it] = lhh[loc_it] + shift; 178 179 /* we delay applying the shift here */ 180 for (j = 0; j <= loc_it; j++) { lhh[j] = -lhh[j]; /* flip sign */ } 181 182 /* Compute the norm of the un-normalized new direction using the rearranged formula 183 Note that these are shifted ("barred") quantities */ 184 for (k = 0; k <= loc_it; k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]))); 185 /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */ 186 if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0; 187 if (tt < 0.0) { 188 /* If we detect square root breakdown in the norm, we must restart the algorithm. 189 Here this means we simply break the current loop and reconstruct the solution 190 using the basis we have computed thus far. Note that by breaking immediately, 191 we do not update the iteration count, so computation done in this iteration 192 should be disregarded. 193 */ 194 PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n", ksp->its, (double)tt)); 195 break; 196 } else { 197 tt = PetscSqrtReal(tt); 198 } 199 200 /* new entry in hessenburg is the 2-norm of our new direction */ 201 hh[loc_it + 1] = tt; 202 hes[loc_it + 1] = tt; 203 204 /* The recurred computation for the new direction 205 The division by tt is delayed to the happy breakdown check later 206 Note placement BEFORE the unshift 207 */ 208 PetscCall(VecCopy(ZVEC(loc_it), VEC_VV(loc_it + 1))); 209 PetscCall(VecMAXPY(VEC_VV(loc_it + 1), loc_it + 1, lhh, &VEC_VV(0))); 210 /* (VEC_VV(loc_it+1) is not normalized yet) */ 211 212 /* The recurred computation for the preconditioned vector (u) */ 213 PetscCall(VecCopy(Q, PREVEC(loc_it + 1))); 214 PetscCall(VecMAXPY(PREVEC(loc_it + 1), loc_it + 1, lhh, &PREVEC(0))); 215 if (tt) PetscCall(VecScale(PREVEC(loc_it + 1), 1.0 / tt)); 216 217 /* Unshift an entry in the GS coefficients ("removing the bar") */ 218 lhh[loc_it] -= shift; 219 220 /* The recurred computation for z (Au) 221 Note placement AFTER the "unshift" */ 222 PetscCall(VecCopy(W, ZVEC(loc_it + 1))); 223 PetscCall(VecMAXPY(ZVEC(loc_it + 1), loc_it + 1, lhh, &ZVEC(0))); 224 if (tt) PetscCall(VecScale(ZVEC(loc_it + 1), 1.0 / tt)); 225 226 /* Happy Breakdown Check */ 227 hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); 228 /* RS(loc_it) contains the res_norm from the last iteration */ 229 hapbnd = PetscMin(pipefgmres->haptol, hapbnd); 230 if (tt > hapbnd) { 231 /* scale new direction by its norm */ 232 PetscCall(VecScale(VEC_VV(loc_it + 1), 1.0 / tt)); 233 } else { 234 /* This happens when the solution is exactly reached. */ 235 /* So there is no new direction... */ 236 PetscCall(VecSet(VEC_TEMP, 0.0)); /* set VEC_TEMP to 0 */ 237 hapend = PETSC_TRUE; 238 } 239 /* note that for pipefgmres we could get HES(loc_it+1, loc_it) = 0 and the 240 current solution would not be exact if HES was singular. Note that 241 HH non-singular implies that HES is not singular, and HES is guaranteed 242 to be nonsingular when PREVECS are linearly independent and A is 243 nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity 244 of HES). So we should really add a check to verify that HES is nonsingular.*/ 245 246 /* Note that to be thorough, in debug mode, one could call a LAPACK routine 247 here to check that the Hessenberg matrix is indeed non-singular (since 248 FGMRES does not guarantee this) */ 249 250 /* Now apply rotations to new col of Hessenberg (and right side of system), 251 calculate new rotation, and get new residual norm at the same time*/ 252 PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp, loc_it, &hapend, &res_norm)); 253 if (ksp->reason) break; 254 255 loc_it++; 256 pipefgmres->it = (loc_it - 1); /* Add this here in case it has converged */ 257 258 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 259 ksp->its++; 260 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 261 else ksp->rnorm = 0; 262 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 263 264 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP)); 265 266 /* Catch error in happy breakdown and signal convergence and break from loop */ 267 if (hapend) { 268 if (!ksp->reason) { 269 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "Reached happy break down, but convergence was not indicated. Residual norm = %g", (double)res_norm); 270 ksp->reason = KSP_DIVERGED_BREAKDOWN; 271 break; 272 } 273 } 274 } 275 /* END OF ITERATION LOOP */ 276 277 if (itcount) *itcount = loc_it; 278 279 /* 280 Solve for the "best" coefficients of the Krylov 281 columns, add the solution values together, and possibly unwind the 282 preconditioning from the solution 283 */ 284 285 /* Form the solution (or the solution so far) */ 286 /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln properly navigates */ 287 288 PetscCall(KSPPIPEFGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, loc_it - 1)); 289 290 /* 291 Monitor if we know that we will not return for a restart 292 */ 293 if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) { 294 PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm)); 295 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 296 } 297 PetscFunctionReturn(PETSC_SUCCESS); 298 } 299 300 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) 301 { 302 PetscInt its, itcount; 303 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 304 PetscBool guess_zero = ksp->guess_zero; 305 306 PetscFunctionBegin; 307 /* We have not checked these routines for use with complex numbers. The inner products are likely not defined correctly for that case */ 308 PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars"); 309 310 PetscCall(PetscCitationsRegister(citation, &cited)); 311 312 PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); 313 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 314 ksp->its = 0; 315 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 316 317 itcount = 0; 318 ksp->reason = KSP_CONVERGED_ITERATING; 319 while (!ksp->reason) { 320 PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs)); 321 PetscCall(KSPPIPEFGMRESCycle(&its, ksp)); 322 itcount += its; 323 if (itcount >= ksp->max_it) { 324 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; 325 break; 326 } 327 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ 328 } 329 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ 330 PetscFunctionReturn(PETSC_SUCCESS); 331 } 332 333 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) 334 { 335 PetscFunctionBegin; 336 PetscCall(KSPReset_PIPEFGMRES(ksp)); 337 PetscCall(KSPDestroy_GMRES(ksp)); 338 PetscFunctionReturn(PETSC_SUCCESS); 339 } 340 341 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it) 342 { 343 PetscScalar tt; 344 PetscInt k, j; 345 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 346 347 PetscFunctionBegin; 348 if (it < 0) { /* no pipefgmres steps have been performed */ 349 PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */ 350 PetscFunctionReturn(PETSC_SUCCESS); 351 } 352 353 /* Solve for solution vector that minimizes the residual */ 354 /* solve the upper triangular system - RS is the right side and HH is 355 the upper triangular matrix - put soln in nrs */ 356 if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it); 357 else nrs[it] = 0.0; 358 359 for (k = it - 1; k >= 0; k--) { 360 tt = *RS(k); 361 for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j]; 362 nrs[k] = tt / *HH(k, k); 363 } 364 365 /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */ 366 PetscCall(VecMAXPBY(VEC_TEMP, it + 1, nrs, 0, &PREVEC(0))); 367 368 /* add solution to previous solution */ 369 if (vdest == vguess) { 370 PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP)); 371 } else { 372 PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess)); 373 } 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res) 378 { 379 PetscScalar *hh, *cc, *ss, *rs; 380 PetscInt j; 381 PetscReal hapbnd; 382 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 383 384 PetscFunctionBegin; 385 hh = HH(0, it); /* pointer to beginning of column to update */ 386 cc = CC(0); /* beginning of cosine rotations */ 387 ss = SS(0); /* beginning of sine rotations */ 388 rs = RS(0); /* right-hand side of least squares system */ 389 390 /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ 391 for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j]; 392 393 /* check for the happy breakdown */ 394 hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pipefgmres->haptol); 395 if (PetscAbsScalar(hh[it + 1]) < hapbnd) { 396 PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it)))); 397 *hapend = PETSC_TRUE; 398 } 399 400 /* Apply all the previously computed plane rotations to the new column of the Hessenberg matrix */ 401 /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), 402 and some refs have [c s ; -conj(s) c] (don't be confused!) */ 403 404 for (j = 0; j < it; j++) { 405 PetscScalar hhj = hh[j]; 406 hh[j] = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1]; 407 hh[j + 1] = -ss[j] * hhj + cc[j] * hh[j + 1]; 408 } 409 410 /* 411 compute the new plane rotation, and apply it to: 412 1) the right-hand side of the Hessenberg system (RS) 413 note: it affects RS(it) and RS(it+1) 414 2) the new column of the Hessenberg matrix 415 note: it affects HH(it,it) which is currently pointed to 416 by hh and HH(it+1, it) (*(hh+1)) 417 thus obtaining the updated value of the residual... 418 */ 419 420 /* compute new plane rotation */ 421 422 if (!*hapend) { 423 PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1]))); 424 if (delta == 0.0) { 425 ksp->reason = KSP_DIVERGED_NULL; 426 PetscFunctionReturn(PETSC_SUCCESS); 427 } 428 429 cc[it] = hh[it] / delta; /* new cosine value */ 430 ss[it] = hh[it + 1] / delta; /* new sine value */ 431 432 hh[it] = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1]; 433 rs[it + 1] = -ss[it] * rs[it]; 434 rs[it] = PetscConj(cc[it]) * rs[it]; 435 *res = PetscAbsScalar(rs[it + 1]); 436 } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply 437 another rotation matrix (so RH doesn't change). The new residual is 438 always the new sine term times the residual from last time (RS(it)), 439 but now the new sine rotation would be zero...so the residual should 440 be zero...so we will multiply "zero" by the last residual. This might 441 not be exactly what we want to do here -could just return "zero". */ 442 *res = 0.0; 443 } 444 PetscFunctionReturn(PETSC_SUCCESS); 445 } 446 447 static PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp, Vec ptr, Vec *result) 448 { 449 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 450 451 PetscFunctionBegin; 452 if (!ptr) { 453 if (!pipefgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &pipefgmres->sol_temp)); 454 ptr = pipefgmres->sol_temp; 455 } 456 if (!pipefgmres->nrs) { 457 /* allocate the work area */ 458 PetscCall(PetscMalloc1(pipefgmres->max_k, &pipefgmres->nrs)); 459 } 460 461 PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs, ksp->vec_sol, ptr, ksp, pipefgmres->it)); 462 if (result) *result = ptr; 463 PetscFunctionReturn(PETSC_SUCCESS); 464 } 465 466 static PetscErrorCode KSPSetFromOptions_PIPEFGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject) 467 { 468 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 469 PetscBool flg; 470 PetscScalar shift; 471 472 PetscFunctionBegin; 473 PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject)); 474 PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined FGMRES Options"); 475 PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift", "shift parameter", "KSPPIPEFGMRESSetShift", pipefgmres->shift, &shift, &flg)); 476 if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp, shift)); 477 PetscOptionsHeadEnd(); 478 PetscFunctionReturn(PETSC_SUCCESS); 479 } 480 481 static PetscErrorCode KSPView_PIPEFGMRES(KSP ksp, PetscViewer viewer) 482 { 483 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 484 PetscBool iascii, isstring; 485 486 PetscFunctionBegin; 487 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 488 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring)); 489 490 if (iascii) { 491 PetscCall(PetscViewerASCIIPrintf(viewer, " restart=%" PetscInt_FMT "\n", pipefgmres->max_k)); 492 PetscCall(PetscViewerASCIIPrintf(viewer, " happy breakdown tolerance %g\n", (double)pipefgmres->haptol)); 493 #if defined(PETSC_USE_COMPLEX) 494 PetscCall(PetscViewerASCIIPrintf(viewer, " shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift))); 495 #else 496 PetscCall(PetscViewerASCIIPrintf(viewer, " shift=%g\n", (double)pipefgmres->shift)); 497 #endif 498 } else if (isstring) { 499 PetscCall(PetscViewerStringSPrintf(viewer, "restart %" PetscInt_FMT, pipefgmres->max_k)); 500 #if defined(PETSC_USE_COMPLEX) 501 PetscCall(PetscViewerStringSPrintf(viewer, " shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift))); 502 #else 503 PetscCall(PetscViewerStringSPrintf(viewer, " shift=%g\n", (double)pipefgmres->shift)); 504 #endif 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) 510 { 511 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 512 PetscInt i; 513 514 PetscFunctionBegin; 515 PetscCall(PetscFree(pipefgmres->prevecs)); 516 PetscCall(PetscFree(pipefgmres->zvecs)); 517 for (i = 0; i < pipefgmres->nwork_alloc; i++) { 518 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->prevecs_user_work[i])); 519 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->zvecs_user_work[i])); 520 } 521 PetscCall(PetscFree(pipefgmres->prevecs_user_work)); 522 PetscCall(PetscFree(pipefgmres->zvecs_user_work)); 523 PetscCall(PetscFree(pipefgmres->redux)); 524 PetscCall(KSPReset_GMRES(ksp)); 525 PetscFunctionReturn(PETSC_SUCCESS); 526 } 527 528 /*MC 529 KSPPIPEFGMRES - Implements the Pipelined (1-stage) Flexible Generalized Minimal Residual method {cite}`sananschneppmay2016`. [](sec_pipelineksp). [](sec_flexibleksp) 530 531 Options Database Keys: 532 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against 533 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence) 534 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of vectors are allocated as needed) 535 . -ksp_pipefgmres_shift - the shift to use (defaults to 1. See `KSPPIPEFGMRESSetShift()` 536 - -ksp_gmres_krylov_monitor - plot the Krylov space generated 537 538 Level: intermediate 539 540 Notes: 541 Compare to `KSPPGMRES` and `KSPFGMRES` 542 543 This variant is not "explicitly normalized" like `KSPPGMRES`, and requires a shift parameter. 544 545 A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator. 546 547 Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with `KSPFGMRES`). 548 549 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods. 550 See [](doc_faq_pipelined) 551 552 Developer Note: 553 This class is subclassed off of `KSPGMRES`, see the source code in src/ksp/ksp/impls/gmres for comments on the structure of the code 554 555 Contributed by: 556 P. Sanan and S.M. Schnepp 557 558 .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), [](sec_flexibleksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES` 559 `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()` 560 M*/ 561 562 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) 563 { 564 KSP_PIPEFGMRES *pipefgmres; 565 566 PetscFunctionBegin; 567 PetscCall(PetscNew(&pipefgmres)); 568 569 ksp->data = (void *)pipefgmres; 570 ksp->ops->buildsolution = KSPBuildSolution_PIPEFGMRES; 571 ksp->ops->setup = KSPSetUp_PIPEFGMRES; 572 ksp->ops->solve = KSPSolve_PIPEFGMRES; 573 ksp->ops->reset = KSPReset_PIPEFGMRES; 574 ksp->ops->destroy = KSPDestroy_PIPEFGMRES; 575 ksp->ops->view = KSPView_PIPEFGMRES; 576 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFGMRES; 577 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES; 578 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES; 579 580 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 3)); 581 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1)); 582 583 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES)); 584 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetHapTol_C", KSPGMRESSetHapTol_GMRES)); 587 588 pipefgmres->nextra_vecs = 1; 589 pipefgmres->haptol = 1.0e-30; 590 pipefgmres->q_preallocate = 0; 591 pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS; 592 pipefgmres->orthog = NULL; 593 pipefgmres->nrs = NULL; 594 pipefgmres->sol_temp = NULL; 595 pipefgmres->max_k = PIPEFGMRES_DEFAULT_MAXK; 596 pipefgmres->Rsvd = NULL; 597 pipefgmres->orthogwork = NULL; 598 pipefgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER; 599 pipefgmres->shift = 1.0; 600 PetscFunctionReturn(PETSC_SUCCESS); 601 } 602 603 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp, PetscInt it) 604 { 605 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 606 PetscInt nwork = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */ 607 PetscInt nalloc; /* number to allocate */ 608 PetscInt k; 609 610 PetscFunctionBegin; 611 nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate 612 in a single chunk */ 613 614 /* Adjust the number to allocate to make sure that we don't exceed the 615 number of available slots (pipefgmres->vecs_allocated)*/ 616 if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET; 617 if (!nalloc) PetscFunctionReturn(PETSC_SUCCESS); 618 619 pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */ 620 621 /* work vectors */ 622 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->user_work[nwork], 0, NULL)); 623 for (k = 0; k < nalloc; k++) pipefgmres->vecs[it + VEC_OFFSET + k] = pipefgmres->user_work[nwork][k]; 624 /* specify size of chunk allocated */ 625 pipefgmres->mwork_alloc[nwork] = nalloc; 626 627 /* preconditioned vectors (note we don't use VEC_OFFSET) */ 628 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->prevecs_user_work[nwork], 0, NULL)); 629 for (k = 0; k < nalloc; k++) pipefgmres->prevecs[it + k] = pipefgmres->prevecs_user_work[nwork][k]; 630 631 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->zvecs_user_work[nwork], 0, NULL)); 632 for (k = 0; k < nalloc; k++) pipefgmres->zvecs[it + k] = pipefgmres->zvecs_user_work[nwork][k]; 633 634 /* increment the number of work vector chunks */ 635 pipefgmres->nwork_alloc++; 636 PetscFunctionReturn(PETSC_SUCCESS); 637 } 638 639 /*@ 640 KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined `KSPPIPEFGMRES` solver. 641 642 Logically Collective 643 644 Input Parameters: 645 + ksp - the Krylov space context 646 - shift - the shift 647 648 Options Database Key: 649 . -ksp_pipefgmres_shift <shift> - set the shift parameter 650 651 Level: intermediate 652 653 Note: 654 A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. 655 This can be achieved with PETSc itself by using a few iterations of a Krylov method. 656 See `KSPComputeEigenvalues()` (and note the caveats there). 657 658 .seealso: [](ch_ksp), `KSPPIPEFGMRES`, `KSPComputeEigenvalues()` 659 @*/ 660 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp, PetscScalar shift) 661 { 662 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 663 664 PetscFunctionBegin; 665 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1); 666 PetscValidLogicalCollectiveScalar(ksp, shift, 2); 667 pipefgmres->shift = shift; 668 PetscFunctionReturn(PETSC_SUCCESS); 669 } 670