1 2 /* 3 This file implements PGMRES (a Pipelined Generalized Minimal Residual method) 4 */ 5 6 #include <../src/ksp/ksp/impls/gmres/pgmres/pgmresimpl.h> /*I "petscksp.h" I*/ 7 8 static PetscErrorCode KSPPGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *); 9 static PetscErrorCode KSPPGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt); 10 11 static PetscErrorCode KSPSetUp_PGMRES(KSP ksp) 12 { 13 PetscFunctionBegin; 14 PetscCall(KSPSetUp_GMRES(ksp)); 15 PetscFunctionReturn(PETSC_SUCCESS); 16 } 17 18 static PetscErrorCode KSPPGMRESCycle(PetscInt *itcount, KSP ksp) 19 { 20 KSP_PGMRES *pgmres = (KSP_PGMRES *)(ksp->data); 21 PetscReal res_norm, res, newnorm; 22 PetscInt it = 0, j, k; 23 PetscBool hapend = PETSC_FALSE; 24 25 PetscFunctionBegin; 26 if (itcount) *itcount = 0; 27 PetscCall(VecNormalize(VEC_VV(0), &res_norm)); 28 KSPCheckNorm(ksp, res_norm); 29 res = res_norm; 30 *RS(0) = res_norm; 31 32 /* check for the convergence */ 33 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 34 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res; 35 else ksp->rnorm = 0; 36 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 37 pgmres->it = it - 2; 38 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 39 PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm)); 40 if (!res) { 41 ksp->reason = KSP_CONVERGED_ATOL; 42 PetscCall(PetscInfo(ksp, "Converged due to zero residual norm on entry\n")); 43 PetscFunctionReturn(PETSC_SUCCESS); 44 } 45 46 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP)); 47 for (; !ksp->reason; it++) { 48 Vec Zcur, Znext; 49 if (pgmres->vv_allocated <= it + VEC_OFFSET + 1) PetscCall(KSPGMRESGetNewVectors(ksp, it + 1)); 50 /* VEC_VV(it-1) is orthogonal, it will be normalized once the VecNorm arrives. */ 51 Zcur = VEC_VV(it); /* Zcur is not yet orthogonal, but the VecMDot to orthogonalize it has been started. */ 52 Znext = VEC_VV(it + 1); /* This iteration will compute Znext, update with a deferred correction once we know how 53 Zcur relates to the previous vectors, and start the reduction to orthogonalize it. */ 54 55 if (it < pgmres->max_k + 1 && ksp->its + 1 < PetscMax(2, ksp->max_it)) { /* We don't know whether what we have computed is enough, so apply the matrix. */ 56 PetscCall(KSP_PCApplyBAorAB(ksp, Zcur, Znext, VEC_TEMP_MATOP)); 57 } 58 59 if (it > 1) { /* Complete the pending reduction */ 60 PetscCall(VecNormEnd(VEC_VV(it - 1), NORM_2, &newnorm)); 61 *HH(it - 1, it - 2) = newnorm; 62 } 63 if (it > 0) { /* Finish the reduction computing the latest column of H */ 64 PetscCall(VecMDotEnd(Zcur, it, &(VEC_VV(0)), HH(0, it - 1))); 65 } 66 67 if (it > 1) { 68 /* normalize the base vector from two iterations ago, basis is complete up to here */ 69 PetscCall(VecScale(VEC_VV(it - 1), 1. / *HH(it - 1, it - 2))); 70 71 PetscCall(KSPPGMRESUpdateHessenberg(ksp, it - 2, &hapend, &res)); 72 pgmres->it = it - 2; 73 ksp->its++; 74 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res; 75 else ksp->rnorm = 0; 76 77 PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP)); 78 if (it < pgmres->max_k + 1 || ksp->reason || ksp->its == ksp->max_it) { /* Monitor if we are done or still iterating, but not before a restart. */ 79 PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm)); 80 PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm)); 81 } 82 if (ksp->reason) break; 83 /* Catch error in happy breakdown and signal convergence and break from loop */ 84 if (hapend) { 85 PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "Reached happy break down, but convergence was not indicated. Residual norm = %g", (double)res); 86 ksp->reason = KSP_DIVERGED_BREAKDOWN; 87 break; 88 } 89 90 if (!(it < pgmres->max_k + 1 && ksp->its < ksp->max_it)) break; 91 92 /* The it-2 column of H was not scaled when we computed Zcur, apply correction */ 93 PetscCall(VecScale(Zcur, 1. / *HH(it - 1, it - 2))); 94 /* And Znext computed in this iteration was computed using the under-scaled Zcur */ 95 PetscCall(VecScale(Znext, 1. / *HH(it - 1, it - 2))); 96 97 /* In the previous iteration, we projected an unnormalized Zcur against the Krylov basis, so we need to fix the column of H resulting from that projection. */ 98 for (k = 0; k < it; k++) *HH(k, it - 1) /= *HH(it - 1, it - 2); 99 /* When Zcur was projected against the Krylov basis, VV(it-1) was still not normalized, so fix that too. This 100 * column is complete except for HH(it,it-1) which we won't know until the next iteration. */ 101 *HH(it - 1, it - 1) /= *HH(it - 1, it - 2); 102 } 103 104 if (it > 0) { 105 PetscScalar *work; 106 if (!pgmres->orthogwork) PetscCall(PetscMalloc1(pgmres->max_k + 2, &pgmres->orthogwork)); 107 work = pgmres->orthogwork; 108 /* Apply correction computed by the VecMDot in the last iteration to Znext. The original form is 109 * 110 * Znext -= sum_{j=0}^{i-1} Z[j+1] * H[j,i-1] 111 * 112 * where 113 * 114 * Z[j] = sum_{k=0}^j V[k] * H[k,j-1] 115 * 116 * substituting 117 * 118 * Znext -= sum_{j=0}^{i-1} sum_{k=0}^{j+1} V[k] * H[k,j] * H[j,i-1] 119 * 120 * rearranging the iteration space from row-column to column-row 121 * 122 * Znext -= sum_{k=0}^i sum_{j=k-1}^{i-1} V[k] * H[k,j] * H[j,i-1] 123 * 124 * Note that column it-1 of HH is correct. For all previous columns, we must look at HES because HH has already 125 * been transformed to upper triangular form. 126 */ 127 for (k = 0; k < it + 1; k++) { 128 work[k] = 0; 129 for (j = PetscMax(0, k - 1); j < it - 1; j++) work[k] -= *HES(k, j) * *HH(j, it - 1); 130 } 131 PetscCall(VecMAXPY(Znext, it + 1, work, &VEC_VV(0))); 132 PetscCall(VecAXPY(Znext, -*HH(it - 1, it - 1), Zcur)); 133 134 /* Orthogonalize Zcur against existing basis vectors. */ 135 for (k = 0; k < it; k++) work[k] = -*HH(k, it - 1); 136 PetscCall(VecMAXPY(Zcur, it, work, &VEC_VV(0))); 137 /* Zcur is now orthogonal, and will be referred to as VEC_VV(it) again, though it is still not normalized. */ 138 /* Begin computing the norm of the new vector, will be normalized after the MatMult in the next iteration. */ 139 PetscCall(VecNormBegin(VEC_VV(it), NORM_2, &newnorm)); 140 } 141 142 /* Compute column of H (to the diagonal, but not the subdiagonal) to be able to orthogonalize the newest vector. */ 143 PetscCall(VecMDotBegin(Znext, it + 1, &VEC_VV(0), HH(0, it))); 144 145 /* Start an asynchronous split-mode reduction, the result of the MDot and Norm will be collected on the next iteration. */ 146 PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Znext))); 147 } 148 if (itcount) *itcount = it - 1; /* Number of iterations actually completed. */ 149 150 /* 151 Solve for the "best" coefficients of the Krylov 152 columns, add the solution values together, and possibly unwind the preconditioning from the solution 153 */ 154 /* Form the solution (or the solution so far) */ 155 PetscCall(KSPPGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, it - 2)); 156 PetscFunctionReturn(PETSC_SUCCESS); 157 } 158 159 static PetscErrorCode KSPSolve_PGMRES(KSP ksp) 160 { 161 PetscInt its, itcount; 162 KSP_PGMRES *pgmres = (KSP_PGMRES *)ksp->data; 163 PetscBool guess_zero = ksp->guess_zero; 164 165 PetscFunctionBegin; 166 PetscCheck(!ksp->calc_sings || pgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); 167 PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp)); 168 ksp->its = 0; 169 PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp)); 170 171 itcount = 0; 172 ksp->reason = KSP_CONVERGED_ITERATING; 173 while (!ksp->reason) { 174 PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs)); 175 PetscCall(KSPPGMRESCycle(&its, ksp)); 176 itcount += its; 177 if (itcount >= ksp->max_it) { 178 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; 179 break; 180 } 181 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ 182 } 183 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 static PetscErrorCode KSPDestroy_PGMRES(KSP ksp) 188 { 189 PetscFunctionBegin; 190 PetscCall(KSPDestroy_GMRES(ksp)); 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 static PetscErrorCode KSPPGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it) 195 { 196 PetscScalar tt; 197 PetscInt k, j; 198 KSP_PGMRES *pgmres = (KSP_PGMRES *)(ksp->data); 199 200 PetscFunctionBegin; 201 /* Solve for solution vector that minimizes the residual */ 202 203 if (it < 0) { /* no pgmres steps have been performed */ 204 PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */ 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 208 /* solve the upper triangular system - RS is the right side and HH is 209 the upper triangular matrix - put soln in nrs */ 210 if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it); 211 else nrs[it] = 0.0; 212 213 for (k = it - 1; k >= 0; k--) { 214 tt = *RS(k); 215 for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j]; 216 nrs[k] = tt / *HH(k, k); 217 } 218 219 /* Accumulate the correction to the solution of the preconditioned problem in TEMP */ 220 PetscCall(VecMAXPBY(VEC_TEMP, it + 1, nrs, 0, &VEC_VV(0))); 221 PetscCall(KSPUnwindPreconditioner(ksp, VEC_TEMP, VEC_TEMP_MATOP)); 222 /* add solution to previous solution */ 223 if (vdest == vguess) { 224 PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP)); 225 } else { 226 PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess)); 227 } 228 PetscFunctionReturn(PETSC_SUCCESS); 229 } 230 231 static PetscErrorCode KSPPGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res) 232 { 233 PetscScalar *hh, *cc, *ss, *rs; 234 PetscInt j; 235 PetscReal hapbnd; 236 KSP_PGMRES *pgmres = (KSP_PGMRES *)(ksp->data); 237 238 PetscFunctionBegin; 239 hh = HH(0, it); /* pointer to beginning of column to update */ 240 cc = CC(0); /* beginning of cosine rotations */ 241 ss = SS(0); /* beginning of sine rotations */ 242 rs = RS(0); /* right hand side of least squares system */ 243 244 /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ 245 for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j]; 246 247 /* check for the happy breakdown */ 248 hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pgmres->haptol); 249 if (PetscAbsScalar(hh[it + 1]) < hapbnd) { 250 PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it)))); 251 *hapend = PETSC_TRUE; 252 } 253 254 /* Apply all the previously computed plane rotations to the new column of the Hessenberg matrix */ 255 /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), 256 and some refs have [c s ; -conj(s) c] (don't be confused!) */ 257 258 for (j = 0; j < it; j++) { 259 PetscScalar hhj = hh[j]; 260 hh[j] = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1]; 261 hh[j + 1] = -ss[j] * hhj + cc[j] * hh[j + 1]; 262 } 263 264 /* 265 compute the new plane rotation, and apply it to: 266 1) the right-hand-side of the Hessenberg system (RS) 267 note: it affects RS(it) and RS(it+1) 268 2) the new column of the Hessenberg matrix 269 note: it affects HH(it,it) which is currently pointed to 270 by hh and HH(it+1, it) (*(hh+1)) 271 thus obtaining the updated value of the residual... 272 */ 273 274 /* compute new plane rotation */ 275 276 if (!*hapend) { 277 PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1]))); 278 if (delta == 0.0) { 279 ksp->reason = KSP_DIVERGED_NULL; 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 cc[it] = hh[it] / delta; /* new cosine value */ 284 ss[it] = hh[it + 1] / delta; /* new sine value */ 285 286 hh[it] = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1]; 287 rs[it + 1] = -ss[it] * rs[it]; 288 rs[it] = PetscConj(cc[it]) * rs[it]; 289 *res = PetscAbsScalar(rs[it + 1]); 290 } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply 291 another rotation matrix (so RH doesn't change). The new residual is 292 always the new sine term times the residual from last time (RS(it)), 293 but now the new sine rotation would be zero...so the residual should 294 be zero...so we will multiply "zero" by the last residual. This might 295 not be exactly what we want to do here -could just return "zero". */ 296 *res = 0.0; 297 } 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode KSPBuildSolution_PGMRES(KSP ksp, Vec ptr, Vec *result) 302 { 303 KSP_PGMRES *pgmres = (KSP_PGMRES *)ksp->data; 304 305 PetscFunctionBegin; 306 if (!ptr) { 307 if (!pgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &pgmres->sol_temp)); 308 ptr = pgmres->sol_temp; 309 } 310 if (!pgmres->nrs) { 311 /* allocate the work area */ 312 PetscCall(PetscMalloc1(pgmres->max_k, &pgmres->nrs)); 313 } 314 315 PetscCall(KSPPGMRESBuildSoln(pgmres->nrs, ksp->vec_sol, ptr, ksp, pgmres->it)); 316 if (result) *result = ptr; 317 PetscFunctionReturn(PETSC_SUCCESS); 318 } 319 320 static PetscErrorCode KSPSetFromOptions_PGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject) 321 { 322 PetscFunctionBegin; 323 PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject)); 324 PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined GMRES Options"); 325 PetscOptionsHeadEnd(); 326 PetscFunctionReturn(PETSC_SUCCESS); 327 } 328 329 static PetscErrorCode KSPReset_PGMRES(KSP ksp) 330 { 331 PetscFunctionBegin; 332 PetscCall(KSPReset_GMRES(ksp)); 333 PetscFunctionReturn(PETSC_SUCCESS); 334 } 335 336 /*MC 337 KSPPGMRES - Implements the Pipelined Generalized Minimal Residual method. [](sec_pipelineksp) 338 339 Options Database Keys: 340 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against 341 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence) 342 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of vectors are allocated as needed) 343 . -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default) 344 . -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower) 345 . -ksp_gmres_cgs_refinement_type <refine_never,refine_ifneeded,refine_always> - determine if iterative refinement is used to increase the 346 stability of the classical Gram-Schmidt orthogonalization. 347 - -ksp_gmres_krylov_monitor - plot the Krylov space generated 348 349 Level: beginner 350 351 Note: 352 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods. 353 See [](doc_faq_pipelined) 354 355 References: 356 . * - Ghysels, Ashby, Meerbergen, Vanroose, Hiding global communication latencies in the GMRES algorithm on massively parallel machines, 2012. 357 358 Developer Note: 359 This object is subclassed off of `KSPGMRES`, see the source code in src/ksp/ksp/impls/gmres for comments on the structure of the code 360 361 .seealso: [](ch_ksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPGMRES`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, 362 `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESSetOrthogonalization()`, `KSPGMRESGetOrthogonalization()`, 363 `KSPGMRESClassicalGramSchmidtOrthogonalization()`, `KSPGMRESModifiedGramSchmidtOrthogonalization()`, 364 `KSPGMRESCGSRefinementType`, `KSPGMRESSetCGSRefinementType()`, `KSPGMRESGetCGSRefinementType()`, `KSPGMRESMonitorKrylov()` 365 M*/ 366 367 PETSC_EXTERN PetscErrorCode KSPCreate_PGMRES(KSP ksp) 368 { 369 KSP_PGMRES *pgmres; 370 371 PetscFunctionBegin; 372 PetscCall(PetscNew(&pgmres)); 373 374 ksp->data = (void *)pgmres; 375 ksp->ops->buildsolution = KSPBuildSolution_PGMRES; 376 ksp->ops->setup = KSPSetUp_PGMRES; 377 ksp->ops->solve = KSPSolve_PGMRES; 378 ksp->ops->reset = KSPReset_PGMRES; 379 ksp->ops->destroy = KSPDestroy_PGMRES; 380 ksp->ops->view = KSPView_GMRES; 381 ksp->ops->setfromoptions = KSPSetFromOptions_PGMRES; 382 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES; 383 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES; 384 385 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3)); 386 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2)); 387 PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1)); 388 389 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES)); 390 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetOrthogonalization_C", KSPGMRESSetOrthogonalization_GMRES)); 391 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetOrthogonalization_C", KSPGMRESGetOrthogonalization_GMRES)); 392 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES)); 393 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES)); 394 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetCGSRefinementType_C", KSPGMRESSetCGSRefinementType_GMRES)); 395 PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetCGSRefinementType_C", KSPGMRESGetCGSRefinementType_GMRES)); 396 397 pgmres->nextra_vecs = 1; 398 pgmres->haptol = 1.0e-30; 399 pgmres->q_preallocate = 0; 400 pgmres->delta_allocate = PGMRES_DELTA_DIRECTIONS; 401 pgmres->orthog = KSPGMRESClassicalGramSchmidtOrthogonalization; 402 pgmres->nrs = NULL; 403 pgmres->sol_temp = NULL; 404 pgmres->max_k = PGMRES_DEFAULT_MAXK; 405 pgmres->Rsvd = NULL; 406 pgmres->orthogwork = NULL; 407 pgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER; 408 PetscFunctionReturn(PETSC_SUCCESS); 409 } 410