1 2 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I "petscksp.h" I*/ 3 4 static PetscBool cited = PETSC_FALSE; 5 static const char citation[] = "@article{SSM2016,\n" 6 " author = {P. Sanan and S.M. Schnepp and D.A. May},\n" 7 " title = {Pipelined, Flexible Krylov Subspace Methods},\n" 8 " journal = {SIAM Journal on Scientific Computing},\n" 9 " volume = {38},\n" 10 " number = {5},\n" 11 " pages = {C441-C470},\n" 12 " year = {2016},\n" 13 " doi = {10.1137/15M1049130},\n" 14 " URL = {http://dx.doi.org/10.1137/15M1049130},\n" 15 " eprint = {http://dx.doi.org/10.1137/15M1049130}\n" 16 "}\n"; 17 18 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP, PetscInt); 19 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *); 20 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt); 21 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP); 22 23 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) 24 { 25 PetscInt k; 26 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 27 const PetscInt max_k = pipefgmres->max_k; 28 29 PetscFunctionBegin; 30 PetscCall(KSPSetUp_GMRES(ksp)); 31 32 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs)); 33 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs_user_work)); 34 35 PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->prevecs_user_work[0], 0, NULL)); 36 for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k]; 37 38 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs)); 39 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs_user_work)); 40 41 PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->redux)); 42 43 PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->zvecs_user_work[0], 0, NULL)); 44 for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k]; 45 46 PetscFunctionReturn(PETSC_SUCCESS); 47 } 48 49 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount, KSP ksp) 50 { 51 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data); 52 PetscReal res_norm; 53 PetscReal hapbnd, tt; 54 PetscScalar *hh, *hes, *lhh, shift = pipefgmres->shift; 55 PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ 56 PetscInt loc_it; /* local count of # of dir. in Krylov space */ 57 PetscInt max_k = pipefgmres->max_k; /* max # of directions Krylov space */ 58 PetscInt i, j, k; 59 Mat Amat, Pmat; 60 Vec Q, W; /* Pipelining vectors */ 61 Vec *redux = pipefgmres->redux; /* workspace for single reduction */ 62 63 PetscFunctionBegin; 64 if (itcount) *itcount = 0; 65 66 /* Assign simpler names to these vectors, allocated as pipelining workspace */ 67 Q = VEC_Q; 68 W = VEC_W; 69 70 /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/ 71 /* Note that we add an extra value here to allow for a single reduction */ 72 if (!pipefgmres->orthogwork) PetscCall(PetscMalloc1(pipefgmres->max_k + 2, &pipefgmres->orthogwork)); 73 lhh = pipefgmres->orthogwork; 74 75 /* Number of pseudo iterations since last restart is the number 76 of prestart directions */ 77 loc_it = 0; 78 79 /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in 80 KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln. 81 Note that when KSPPIPEFGMRESBuildSoln is called from this function, 82 (loc_it -1) is passed, so the two are equivalent */ 83 pipefgmres->it = (loc_it - 1); 84 85 /* initial residual is in VEC_VV(0) - compute its norm*/ 86 PetscCall(VecNorm(VEC_VV(0), NORM_2, &res_norm)); 87 88 /* first entry in right-hand-side of hessenberg system is just 89 the initial residual norm */ 90 *RS(0) = res_norm; 91 92 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 93 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 94 else ksp->rnorm = 0; 95 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 96 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 97 PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm)); 98 99 /* check for the convergence - maybe the current guess is good enough */ 100 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP)); 101 if (ksp->reason) { 102 if (itcount) *itcount = 0; 103 PetscFunctionReturn(PETSC_SUCCESS); 104 } 105 106 /* scale VEC_VV (the initial residual) */ 107 PetscCall(VecScale(VEC_VV(0), 1.0 / res_norm)); 108 109 /* Fill the pipeline */ 110 PetscCall(KSP_PCApply(ksp, VEC_VV(loc_it), PREVEC(loc_it))); 111 PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat)); 112 PetscCall(KSP_MatMult(ksp, Amat, PREVEC(loc_it), ZVEC(loc_it))); 113 PetscCall(VecAXPY(ZVEC(loc_it), -shift, VEC_VV(loc_it))); /* Note shift */ 114 115 /* MAIN ITERATION LOOP BEGINNING*/ 116 /* keep iterating until we have converged OR generated the max number 117 of directions OR reached the max number of iterations for the method */ 118 while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { 119 if (loc_it) { 120 PetscCall(KSPLogResidualHistory(ksp, res_norm)); 121 PetscCall(KSPMonitor(ksp, ksp->its, res_norm)); 122 } 123 pipefgmres->it = (loc_it - 1); 124 125 /* see if more space is needed for work vectors */ 126 if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { 127 PetscCall(KSPPIPEFGMRESGetNewVectors(ksp, loc_it + 1)); 128 /* (loc_it+1) is passed in as number of the first vector that should be allocated */ 129 } 130 131 /* Note that these inner products are with "Z" now, so 132 in particular, lhh[loc_it] is the 'barred' or 'shifted' value, 133 not the value from the equivalent FGMRES run (even in exact arithmetic) 134 That is, the H we need for the Arnoldi relation is different from the 135 coefficients we use in the orthogonalization process,because of the shift */ 136 137 /* Do some local twiddling to allow for a single reduction */ 138 for (i = 0; i < loc_it + 1; i++) redux[i] = VEC_VV(i); 139 redux[loc_it + 1] = ZVEC(loc_it); 140 141 /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */ 142 PetscCall(VecMDotBegin(ZVEC(loc_it), loc_it + 2, redux, lhh)); 143 144 /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/ 145 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)))); 146 147 /* The work to be overlapped with the inner products follows. 148 This is application of the preconditioner and the operator 149 to compute intermediate quantites which will be combined (locally) 150 with the results of the inner products. 151 */ 152 PetscCall(KSP_PCApply(ksp, ZVEC(loc_it), Q)); 153 PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat)); 154 PetscCall(KSP_MatMult(ksp, Amat, Q, W)); 155 156 /* Compute inner products of the new direction with previous directions, 157 and the norm of the to-be-orthogonalized direction "Z". 158 This information is enough to build the required entries 159 of H. The inner product with VEC_VV(it_loc) is 160 *different* than in the standard FGMRES and need to be dealt with specially. 161 That is, for standard FGMRES the orthogonalization coefficients are the same 162 as the coefficients used in the Arnoldi relation to reconstruct, but here this 163 is not true (albeit only for the one entry of H which we "unshift" below. */ 164 165 /* Finish the dot product, retrieving the extra entry */ 166 PetscCall(VecMDotEnd(ZVEC(loc_it), loc_it + 2, redux, lhh)); 167 tt = PetscRealPart(lhh[loc_it + 1]); 168 169 /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt 170 Note that the Hessenberg entries require a shift, as these are for the 171 relation AU = VH, which is wrt unshifted basis vectors */ 172 hh = HH(0, loc_it); 173 hes = HES(0, loc_it); 174 for (j = 0; j < loc_it; j++) { 175 hh[j] = lhh[j]; 176 hes[j] = lhh[j]; 177 } 178 hh[loc_it] = lhh[loc_it] + shift; 179 hes[loc_it] = lhh[loc_it] + shift; 180 181 /* we delay applying the shift here */ 182 for (j = 0; j <= loc_it; j++) { lhh[j] = -lhh[j]; /* flip sign */ } 183 184 /* Compute the norm of the un-normalized new direction using the rearranged formula 185 Note that these are shifted ("barred") quantities */ 186 for (k = 0; k <= loc_it; k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]))); 187 /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */ 188 if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0; 189 if (tt < 0.0) { 190 /* If we detect square root breakdown in the norm, we must restart the algorithm. 191 Here this means we simply break the current loop and reconstruct the solution 192 using the basis we have computed thus far. Note that by breaking immediately, 193 we do not update the iteration count, so computation done in this iteration 194 should be disregarded. 195 */ 196 PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n", ksp->its, (double)tt)); 197 break; 198 } else { 199 tt = PetscSqrtReal(tt); 200 } 201 202 /* new entry in hessenburg is the 2-norm of our new direction */ 203 hh[loc_it + 1] = tt; 204 hes[loc_it + 1] = tt; 205 206 /* The recurred computation for the new direction 207 The division by tt is delayed to the happy breakdown check later 208 Note placement BEFORE the unshift 209 */ 210 PetscCall(VecCopy(ZVEC(loc_it), VEC_VV(loc_it + 1))); 211 PetscCall(VecMAXPY(VEC_VV(loc_it + 1), loc_it + 1, lhh, &VEC_VV(0))); 212 /* (VEC_VV(loc_it+1) is not normalized yet) */ 213 214 /* The recurred computation for the preconditioned vector (u) */ 215 PetscCall(VecCopy(Q, PREVEC(loc_it + 1))); 216 PetscCall(VecMAXPY(PREVEC(loc_it + 1), loc_it + 1, lhh, &PREVEC(0))); 217 if (tt) PetscCall(VecScale(PREVEC(loc_it + 1), 1.0 / tt)); 218 219 /* Unshift an entry in the GS coefficients ("removing the bar") */ 220 lhh[loc_it] -= shift; 221 222 /* The recurred computation for z (Au) 223 Note placement AFTER the "unshift" */ 224 PetscCall(VecCopy(W, ZVEC(loc_it + 1))); 225 PetscCall(VecMAXPY(ZVEC(loc_it + 1), loc_it + 1, lhh, &ZVEC(0))); 226 if (tt) PetscCall(VecScale(ZVEC(loc_it + 1), 1.0 / tt)); 227 228 /* Happy Breakdown Check */ 229 hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); 230 /* RS(loc_it) contains the res_norm from the last iteration */ 231 hapbnd = PetscMin(pipefgmres->haptol, hapbnd); 232 if (tt > hapbnd) { 233 /* scale new direction by its norm */ 234 PetscCall(VecScale(VEC_VV(loc_it + 1), 1.0 / tt)); 235 } else { 236 /* This happens when the solution is exactly reached. */ 237 /* So there is no new direction... */ 238 PetscCall(VecSet(VEC_TEMP, 0.0)); /* set VEC_TEMP to 0 */ 239 hapend = PETSC_TRUE; 240 } 241 /* note that for pipefgmres we could get HES(loc_it+1, loc_it) = 0 and the 242 current solution would not be exact if HES was singular. Note that 243 HH non-singular implies that HES is not singular, and HES is guaranteed 244 to be nonsingular when PREVECS are linearly independent and A is 245 nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity 246 of HES). So we should really add a check to verify that HES is nonsingular.*/ 247 248 /* Note that to be thorough, in debug mode, one could call a LAPACK routine 249 here to check that the hessenberg matrix is indeed non-singular (since 250 FGMRES does not guarantee this) */ 251 252 /* Now apply rotations to new col of hessenberg (and right side of system), 253 calculate new rotation, and get new residual norm at the same time*/ 254 PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp, loc_it, &hapend, &res_norm)); 255 if (ksp->reason) break; 256 257 loc_it++; 258 pipefgmres->it = (loc_it - 1); /* Add this here in case it has converged */ 259 260 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 261 ksp->its++; 262 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 263 else ksp->rnorm = 0; 264 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 265 266 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP)); 267 268 /* Catch error in happy breakdown and signal convergence and break from loop */ 269 if (hapend) { 270 if (!ksp->reason) { 271 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "Reached happy break down, but convergence was not indicated. Residual norm = %g", (double)res_norm); 272 ksp->reason = KSP_DIVERGED_BREAKDOWN; 273 break; 274 } 275 } 276 } 277 /* END OF ITERATION LOOP */ 278 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 279 280 /* 281 Monitor if we know that we will not return for a restart */ 282 if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm)); 283 284 if (itcount) *itcount = loc_it; 285 286 /* 287 Solve for the "best" coefficients of the Krylov 288 columns, add the solution values together, and possibly unwind the 289 preconditioning from the solution 290 */ 291 292 /* Form the solution (or the solution so far) */ 293 /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln properly navigates */ 294 295 PetscCall(KSPPIPEFGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, loc_it - 1)); 296 PetscFunctionReturn(PETSC_SUCCESS); 297 } 298 299 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) 300 { 301 PetscInt its, itcount; 302 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 303 PetscBool guess_zero = ksp->guess_zero; 304 305 PetscFunctionBegin; 306 /* We have not checked these routines for use with complex numbers. The inner products are likely not defined correctly for that case */ 307 PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars"); 308 309 PetscCall(PetscCitationsRegister(citation, &cited)); 310 311 PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); 312 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 313 ksp->its = 0; 314 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 315 316 itcount = 0; 317 ksp->reason = KSP_CONVERGED_ITERATING; 318 while (!ksp->reason) { 319 PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs)); 320 PetscCall(KSPPIPEFGMRESCycle(&its, ksp)); 321 itcount += its; 322 if (itcount >= ksp->max_it) { 323 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; 324 break; 325 } 326 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ 327 } 328 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ 329 PetscFunctionReturn(PETSC_SUCCESS); 330 } 331 332 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) 333 { 334 PetscFunctionBegin; 335 PetscCall(KSPReset_PIPEFGMRES(ksp)); 336 PetscCall(KSPDestroy_GMRES(ksp)); 337 PetscFunctionReturn(PETSC_SUCCESS); 338 } 339 340 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it) 341 { 342 PetscScalar tt; 343 PetscInt k, j; 344 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data); 345 346 PetscFunctionBegin; 347 if (it < 0) { /* no pipefgmres steps have been performed */ 348 PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */ 349 PetscFunctionReturn(PETSC_SUCCESS); 350 } 351 352 /* Solve for solution vector that minimizes the residual */ 353 /* solve the upper triangular system - RS is the right side and HH is 354 the upper triangular matrix - put soln in nrs */ 355 if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it); 356 else nrs[it] = 0.0; 357 358 for (k = it - 1; k >= 0; k--) { 359 tt = *RS(k); 360 for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j]; 361 nrs[k] = tt / *HH(k, k); 362 } 363 364 /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */ 365 PetscCall(VecMAXPBY(VEC_TEMP, it + 1, nrs, 0, &PREVEC(0))); 366 367 /* add solution to previous solution */ 368 if (vdest == vguess) { 369 PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP)); 370 } else { 371 PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess)); 372 } 373 PetscFunctionReturn(PETSC_SUCCESS); 374 } 375 376 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res) 377 { 378 PetscScalar *hh, *cc, *ss, *rs; 379 PetscInt j; 380 PetscReal hapbnd; 381 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data); 382 383 PetscFunctionBegin; 384 hh = HH(0, it); /* pointer to beginning of column to update */ 385 cc = CC(0); /* beginning of cosine rotations */ 386 ss = SS(0); /* beginning of sine rotations */ 387 rs = RS(0); /* right hand side of least squares system */ 388 389 /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ 390 for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j]; 391 392 /* check for the happy breakdown */ 393 hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pipefgmres->haptol); 394 if (PetscAbsScalar(hh[it + 1]) < hapbnd) { 395 PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it)))); 396 *hapend = PETSC_TRUE; 397 } 398 399 /* Apply all the previously computed plane rotations to the new column of the Hessenberg matrix */ 400 /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), 401 and some refs have [c s ; -conj(s) c] (don't be confused!) */ 402 403 for (j = 0; j < it; j++) { 404 PetscScalar hhj = hh[j]; 405 hh[j] = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1]; 406 hh[j + 1] = -ss[j] * hhj + cc[j] * hh[j + 1]; 407 } 408 409 /* 410 compute the new plane rotation, and apply it to: 411 1) the right-hand-side of the Hessenberg system (RS) 412 note: it affects RS(it) and RS(it+1) 413 2) the new column of the Hessenberg matrix 414 note: it affects HH(it,it) which is currently pointed to 415 by hh and HH(it+1, it) (*(hh+1)) 416 thus obtaining the updated value of the residual... 417 */ 418 419 /* compute new plane rotation */ 420 421 if (!*hapend) { 422 PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1]))); 423 if (delta == 0.0) { 424 ksp->reason = KSP_DIVERGED_NULL; 425 PetscFunctionReturn(PETSC_SUCCESS); 426 } 427 428 cc[it] = hh[it] / delta; /* new cosine value */ 429 ss[it] = hh[it + 1] / delta; /* new sine value */ 430 431 hh[it] = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1]; 432 rs[it + 1] = -ss[it] * rs[it]; 433 rs[it] = PetscConj(cc[it]) * rs[it]; 434 *res = PetscAbsScalar(rs[it + 1]); 435 } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply 436 another rotation matrix (so RH doesn't change). The new residual is 437 always the new sine term times the residual from last time (RS(it)), 438 but now the new sine rotation would be zero...so the residual should 439 be zero...so we will multiply "zero" by the last residual. This might 440 not be exactly what we want to do here -could just return "zero". */ 441 *res = 0.0; 442 } 443 PetscFunctionReturn(PETSC_SUCCESS); 444 } 445 446 static PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp, Vec ptr, Vec *result) 447 { 448 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 449 450 PetscFunctionBegin; 451 if (!ptr) { 452 if (!pipefgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &pipefgmres->sol_temp)); 453 ptr = pipefgmres->sol_temp; 454 } 455 if (!pipefgmres->nrs) { 456 /* allocate the work area */ 457 PetscCall(PetscMalloc1(pipefgmres->max_k, &pipefgmres->nrs)); 458 } 459 460 PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs, ksp->vec_sol, ptr, ksp, pipefgmres->it)); 461 if (result) *result = ptr; 462 PetscFunctionReturn(PETSC_SUCCESS); 463 } 464 465 static PetscErrorCode KSPSetFromOptions_PIPEFGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject) 466 { 467 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 468 PetscBool flg; 469 PetscScalar shift; 470 471 PetscFunctionBegin; 472 PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject)); 473 PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined FGMRES Options"); 474 PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift", "shift parameter", "KSPPIPEFGMRESSetShift", pipefgmres->shift, &shift, &flg)); 475 if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp, shift)); 476 PetscOptionsHeadEnd(); 477 PetscFunctionReturn(PETSC_SUCCESS); 478 } 479 480 static PetscErrorCode KSPView_PIPEFGMRES(KSP ksp, PetscViewer viewer) 481 { 482 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 483 PetscBool iascii, isstring; 484 485 PetscFunctionBegin; 486 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 487 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring)); 488 489 if (iascii) { 490 PetscCall(PetscViewerASCIIPrintf(viewer, " restart=%" PetscInt_FMT "\n", pipefgmres->max_k)); 491 PetscCall(PetscViewerASCIIPrintf(viewer, " happy breakdown tolerance %g\n", (double)pipefgmres->haptol)); 492 #if defined(PETSC_USE_COMPLEX) 493 PetscCall(PetscViewerASCIIPrintf(viewer, " shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift))); 494 #else 495 PetscCall(PetscViewerASCIIPrintf(viewer, " shift=%g\n", (double)pipefgmres->shift)); 496 #endif 497 } else if (isstring) { 498 PetscCall(PetscViewerStringSPrintf(viewer, "restart %" PetscInt_FMT, pipefgmres->max_k)); 499 #if defined(PETSC_USE_COMPLEX) 500 PetscCall(PetscViewerStringSPrintf(viewer, " shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift))); 501 #else 502 PetscCall(PetscViewerStringSPrintf(viewer, " shift=%g\n", (double)pipefgmres->shift)); 503 #endif 504 } 505 PetscFunctionReturn(PETSC_SUCCESS); 506 } 507 508 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) 509 { 510 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 511 PetscInt i; 512 513 PetscFunctionBegin; 514 PetscCall(PetscFree(pipefgmres->prevecs)); 515 PetscCall(PetscFree(pipefgmres->zvecs)); 516 for (i = 0; i < pipefgmres->nwork_alloc; i++) { 517 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->prevecs_user_work[i])); 518 PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->zvecs_user_work[i])); 519 } 520 PetscCall(PetscFree(pipefgmres->prevecs_user_work)); 521 PetscCall(PetscFree(pipefgmres->zvecs_user_work)); 522 PetscCall(PetscFree(pipefgmres->redux)); 523 PetscCall(KSPReset_GMRES(ksp)); 524 PetscFunctionReturn(PETSC_SUCCESS); 525 } 526 527 /*MC 528 KSPPIPEFGMRES - Implements the Pipelined (1-stage) Flexible Generalized Minimal Residual method. [](sec_pipelineksp). [](sec_flexibleksp) 529 530 Options Database Keys: 531 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against 532 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence) 533 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of vectors are allocated as needed) 534 . -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift() 535 - -ksp_gmres_krylov_monitor - plot the Krylov space generated 536 537 Level: intermediate 538 539 Notes: 540 This variant is not "explicitly normalized" like `KSPPGMRES`, and requires a shift parameter. 541 542 A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator. 543 544 Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with `KSPFGMRES`). 545 546 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods. 547 See [](doc_faq_pipelined) 548 549 Developer Note: 550 This class is subclassed off of `KSPGMRES`, see the source code in src/ksp/ksp/impls/gmres for comments on the structure of the code 551 552 Contributed by: 553 P. Sanan and S.M. Schnepp 554 555 Reference: 556 . * - P. Sanan, S.M. Schnepp, and D.A. May, "Pipelined, Flexible Krylov Subspace Methods," 557 SIAM Journal on Scientific Computing 2016 38:5, C441-C470, DOI: 10.1137/15M1049130 558 559 .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), [](sec_flexibleksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES` 560 `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()` 561 M*/ 562 563 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) 564 { 565 KSP_PIPEFGMRES *pipefgmres; 566 567 PetscFunctionBegin; 568 PetscCall(PetscNew(&pipefgmres)); 569 570 ksp->data = (void *)pipefgmres; 571 ksp->ops->buildsolution = KSPBuildSolution_PIPEFGMRES; 572 ksp->ops->setup = KSPSetUp_PIPEFGMRES; 573 ksp->ops->solve = KSPSolve_PIPEFGMRES; 574 ksp->ops->reset = KSPReset_PIPEFGMRES; 575 ksp->ops->destroy = KSPDestroy_PIPEFGMRES; 576 ksp->ops->view = KSPView_PIPEFGMRES; 577 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFGMRES; 578 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES; 579 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES; 580 581 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 3)); 582 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1)); 583 584 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES)); 587 588 pipefgmres->nextra_vecs = 1; 589 pipefgmres->haptol = 1.0e-30; 590 pipefgmres->q_preallocate = 0; 591 pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS; 592 pipefgmres->orthog = NULL; 593 pipefgmres->nrs = NULL; 594 pipefgmres->sol_temp = NULL; 595 pipefgmres->max_k = PIPEFGMRES_DEFAULT_MAXK; 596 pipefgmres->Rsvd = NULL; 597 pipefgmres->orthogwork = NULL; 598 pipefgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER; 599 pipefgmres->shift = 1.0; 600 PetscFunctionReturn(PETSC_SUCCESS); 601 } 602 603 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp, PetscInt it) 604 { 605 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 606 PetscInt nwork = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */ 607 PetscInt nalloc; /* number to allocate */ 608 PetscInt k; 609 610 PetscFunctionBegin; 611 nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate 612 in a single chunk */ 613 614 /* Adjust the number to allocate to make sure that we don't exceed the 615 number of available slots (pipefgmres->vecs_allocated)*/ 616 if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET; 617 if (!nalloc) PetscFunctionReturn(PETSC_SUCCESS); 618 619 pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */ 620 621 /* work vectors */ 622 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->user_work[nwork], 0, NULL)); 623 for (k = 0; k < nalloc; k++) pipefgmres->vecs[it + VEC_OFFSET + k] = pipefgmres->user_work[nwork][k]; 624 /* specify size of chunk allocated */ 625 pipefgmres->mwork_alloc[nwork] = nalloc; 626 627 /* preconditioned vectors (note we don't use VEC_OFFSET) */ 628 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->prevecs_user_work[nwork], 0, NULL)); 629 for (k = 0; k < nalloc; k++) pipefgmres->prevecs[it + k] = pipefgmres->prevecs_user_work[nwork][k]; 630 631 PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->zvecs_user_work[nwork], 0, NULL)); 632 for (k = 0; k < nalloc; k++) pipefgmres->zvecs[it + k] = pipefgmres->zvecs_user_work[nwork][k]; 633 634 /* increment the number of work vector chunks */ 635 pipefgmres->nwork_alloc++; 636 PetscFunctionReturn(PETSC_SUCCESS); 637 } 638 639 /*@ 640 KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined `KSPPIPEFGMRES` solver. 641 642 Logically Collective 643 644 Input Parameters: 645 + ksp - the Krylov space context 646 - shift - the shift 647 648 Options Database Key: 649 . -ksp_pipefgmres_shift <shift> - set the shift parameter 650 651 Level: intermediate 652 653 Note: 654 A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. 655 This can be achieved with PETSc itself by using a few iterations of a Krylov method. 656 See `KSPComputeEigenvalues()` (and note the caveats there). 657 658 .seealso: [](ch_ksp), `KSPPIPEFGMRES`, `KSPComputeEigenvalues()` 659 @*/ 660 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp, PetscScalar shift) 661 { 662 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data; 663 664 PetscFunctionBegin; 665 PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1); 666 PetscValidLogicalCollectiveScalar(ksp, shift, 2); 667 pipefgmres->shift = shift; 668 PetscFunctionReturn(PETSC_SUCCESS); 669 } 670