1 /* 2 Contributed by Patrick Sanan and Sascha M. Schnepp 3 */ 4 5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I "petscksp.h" I*/ 6 7 static PetscBool cited = PETSC_FALSE; 8 static const char citation[] = 9 "@article{SSM2016,\n" 10 " author = {P. Sanan and S.M. Schnepp and D.A. May},\n" 11 " title = {Pipelined, Flexible Krylov Subspace Methods},\n" 12 " journal = {SIAM Journal on Scientific Computing},\n" 13 " volume = {38},\n" 14 " number = {5},\n" 15 " pages = {C441-C470},\n" 16 " year = {2016},\n" 17 " doi = {10.1137/15M1049130},\n" 18 " URL = {http://dx.doi.org/10.1137/15M1049130},\n" 19 " eprint = {http://dx.doi.org/10.1137/15M1049130}\n" 20 "}\n"; 21 22 #define PIPEFGMRES_DELTA_DIRECTIONS 10 23 #define PIPEFGMRES_DEFAULT_MAXK 30 24 25 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt); 26 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*); 27 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt); 28 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP); 29 30 /* 31 32 KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres. 33 34 This is called once, usually automatically by KSPSolve() or KSPSetUp(), 35 but can be called directly by KSPSetUp(). 36 37 */ 38 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) 39 { 40 PetscErrorCode ierr; 41 PetscInt k; 42 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 43 const PetscInt max_k = pipefgmres->max_k; 44 45 PetscFunctionBegin; 46 ierr = KSPSetUp_GMRES(ksp);CHKERRQ(ierr); 47 48 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs);CHKERRQ(ierr); 49 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work);CHKERRQ(ierr); 50 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr); 51 52 ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL);CHKERRQ(ierr); 53 ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0]);CHKERRQ(ierr); 54 for (k=0; k < pipefgmres->vv_allocated; k++) { 55 pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k]; 56 } 57 58 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs);CHKERRQ(ierr); 59 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work);CHKERRQ(ierr); 60 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr); 61 62 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux);CHKERRQ(ierr); 63 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)));CHKERRQ(ierr); 64 65 ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL);CHKERRQ(ierr); 66 ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0]);CHKERRQ(ierr); 67 for (k=0; k < pipefgmres->vv_allocated; k++) { 68 pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k]; 69 } 70 71 PetscFunctionReturn(0); 72 } 73 74 /* 75 76 KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart. Return residual 77 history if requested. 78 79 input parameters: 80 . pipefgmres - structure containing parameters and work areas 81 82 output parameters: 83 . itcount - number of iterations used. If null, ignored. 84 . converged - 0 if not converged 85 86 Notes: 87 On entry, the value in vector VEC_VV(0) should be 88 the initial residual. 89 90 91 */ 92 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp) 93 { 94 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 95 PetscReal res_norm; 96 PetscReal hapbnd,tt; 97 PetscScalar *hh,*hes,*lhh,shift = pipefgmres->shift; 98 PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ 99 PetscErrorCode ierr; 100 PetscInt loc_it; /* local count of # of dir. in Krylov space */ 101 PetscInt max_k = pipefgmres->max_k; /* max # of directions Krylov space */ 102 PetscInt i,j,k; 103 Mat Amat,Pmat; 104 Vec Q,W; /* Pipelining vectors */ 105 Vec *redux = pipefgmres->redux; /* workspace for single reduction */ 106 107 PetscFunctionBegin; 108 if (itcount) *itcount = 0; 109 110 /* Assign simpler names to these vectors, allocated as pipelining workspace */ 111 Q = VEC_Q; 112 W = VEC_W; 113 114 /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/ 115 /* Note that we add an extra value here to allow for a single reduction */ 116 if (!pipefgmres->orthogwork) { ierr = PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork);CHKERRQ(ierr); 117 } 118 lhh = pipefgmres->orthogwork; 119 120 /* Number of pseudo iterations since last restart is the number 121 of prestart directions */ 122 loc_it = 0; 123 124 /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in 125 KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln. 126 Note that when KSPPIPEFGMRESBuildSoln is called from this function, 127 (loc_it -1) is passed, so the two are equivalent */ 128 pipefgmres->it = (loc_it - 1); 129 130 /* initial residual is in VEC_VV(0) - compute its norm*/ 131 ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr); 132 133 /* first entry in right-hand-side of hessenberg system is just 134 the initial residual norm */ 135 *RS(0) = res_norm; 136 137 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); 138 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 139 else ksp->rnorm = 0; 140 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); 141 ierr = KSPLogResidualHistory(ksp,ksp->rnorm);CHKERRQ(ierr); 142 ierr = KSPMonitor(ksp,ksp->its,ksp->rnorm);CHKERRQ(ierr); 143 144 /* check for the convergence - maybe the current guess is good enough */ 145 ierr = (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); 146 if (ksp->reason) { 147 if (itcount) *itcount = 0; 148 PetscFunctionReturn(0); 149 } 150 151 /* scale VEC_VV (the initial residual) */ 152 ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr); 153 154 /* Fill the pipeline */ 155 ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr); 156 ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); 157 ierr = KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it));CHKERRQ(ierr); 158 ierr = VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it));CHKERRQ(ierr); /* Note shift */ 159 160 /* MAIN ITERATION LOOP BEGINNING*/ 161 /* keep iterating until we have converged OR generated the max number 162 of directions OR reached the max number of iterations for the method */ 163 while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { 164 if (loc_it) { 165 ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); 166 ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); 167 } 168 pipefgmres->it = (loc_it - 1); 169 170 /* see if more space is needed for work vectors */ 171 if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { 172 ierr = KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr); 173 /* (loc_it+1) is passed in as number of the first vector that should 174 be allocated */ 175 } 176 177 /* Note that these inner products are with "Z" now, so 178 in particular, lhh[loc_it] is the 'barred' or 'shifted' value, 179 not the value from the equivalent FGMRES run (even in exact arithmetic) 180 That is, the H we need for the Arnoldi relation is different from the 181 coefficients we use in the orthogonalization process,because of the shift */ 182 183 /* Do some local twiddling to allow for a single reduction */ 184 for (i=0;i<loc_it+1;i++){ 185 redux[i] = VEC_VV(i); 186 } 187 redux[loc_it+1] = ZVEC(loc_it); 188 189 /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */ 190 ierr = VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr); 191 192 /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/ 193 ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)));CHKERRQ(ierr); 194 195 /* The work to be overlapped with the inner products follows. 196 This is application of the preconditioner and the operator 197 to compute intermediate quantites which will be combined (locally) 198 with the results of the inner products. 199 */ 200 ierr = KSP_PCApply(ksp,ZVEC(loc_it),Q);CHKERRQ(ierr); 201 ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); 202 ierr = KSP_MatMult(ksp,Amat,Q,W);CHKERRQ(ierr); 203 204 /* Compute inner products of the new direction with previous directions, 205 and the norm of the to-be-orthogonalized direction "Z". 206 This information is enough to build the required entries 207 of H. The inner product with VEC_VV(it_loc) is 208 *different* than in the standard FGMRES and need to be dealt with specially. 209 That is, for standard FGMRES the orthogonalization coefficients are the same 210 as the coefficients used in the Arnoldi relation to reconstruct, but here this 211 is not true (albeit only for the one entry of H which we "unshift" below. */ 212 213 /* Finish the dot product, retrieving the extra entry */ 214 ierr = VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr); 215 tt = PetscRealPart(lhh[loc_it+1]); 216 217 /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt 218 Note that the Hessenberg entries require a shift, as these are for the 219 relation AU = VH, which is wrt unshifted basis vectors */ 220 hh = HH(0,loc_it); hes=HES(0,loc_it); 221 for (j=0; j<loc_it; j++) { 222 hh[j] = lhh[j]; 223 hes[j] = lhh[j]; 224 } 225 hh[loc_it] = lhh[loc_it] + shift; 226 hes[loc_it] = lhh[loc_it] + shift; 227 228 /* we delay applying the shift here */ 229 for (j=0; j<=loc_it; j++) { 230 lhh[j] = -lhh[j]; /* flip sign */ 231 } 232 233 /* Compute the norm of the un-normalized new direction using the rearranged formula 234 Note that these are shifted ("barred") quantities */ 235 for (k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]))); 236 /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */ 237 if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0 ; 238 if (tt < 0.0) { 239 /* If we detect square root breakdown in the norm, we must restart the algorithm. 240 Here this means we simply break the current loop and reconstruct the solution 241 using the basis we have computed thus far. Note that by breaking immediately, 242 we do not update the iteration count, so computation done in this iteration 243 should be disregarded. 244 */ 245 ierr = PetscInfo2(ksp,"Restart due to square root breakdown at it = %D, tt=%g\n",ksp->its,(double)tt);CHKERRQ(ierr); 246 break; 247 } else { 248 tt = PetscSqrtReal(tt); 249 } 250 251 /* new entry in hessenburg is the 2-norm of our new direction */ 252 hh[loc_it+1] = tt; 253 hes[loc_it+1] = tt; 254 255 /* The recurred computation for the new direction 256 The division by tt is delayed to the happy breakdown check later 257 Note placement BEFORE the unshift 258 */ 259 ierr = VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1));CHKERRQ(ierr); 260 ierr = VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0));CHKERRQ(ierr); 261 /* (VEC_VV(loc_it+1) is not normalized yet) */ 262 263 /* The recurred computation for the preconditioned vector (u) */ 264 ierr = VecCopy(Q,PREVEC(loc_it+1));CHKERRQ(ierr); 265 ierr = VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0));CHKERRQ(ierr); 266 ierr = VecScale(PREVEC(loc_it+1),1.0/tt);CHKERRQ(ierr); 267 268 /* Unshift an entry in the GS coefficients ("removing the bar") */ 269 lhh[loc_it] -= shift; 270 271 /* The recurred computation for z (Au) 272 Note placement AFTER the "unshift" */ 273 ierr = VecCopy(W,ZVEC(loc_it+1));CHKERRQ(ierr); 274 ierr = VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0));CHKERRQ(ierr); 275 ierr = VecScale(ZVEC(loc_it+1),1.0/tt);CHKERRQ(ierr); 276 277 /* Happy Breakdown Check */ 278 hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); 279 /* RS(loc_it) contains the res_norm from the last iteration */ 280 hapbnd = PetscMin(pipefgmres->haptol,hapbnd); 281 if (tt > hapbnd) { 282 /* scale new direction by its norm */ 283 ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr); 284 } else { 285 /* This happens when the solution is exactly reached. */ 286 /* So there is no new direction... */ 287 ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); /* set VEC_TEMP to 0 */ 288 hapend = PETSC_TRUE; 289 } 290 /* note that for pipefgmres we could get HES(loc_it+1, loc_it) = 0 and the 291 current solution would not be exact if HES was singular. Note that 292 HH non-singular implies that HES is not singular, and HES is guaranteed 293 to be nonsingular when PREVECS are linearly independent and A is 294 nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity 295 of HES). So we should really add a check to verify that HES is nonsingular.*/ 296 297 /* Note that to be thorough, in debug mode, one could call a LAPACK routine 298 here to check that the hessenberg matrix is indeed non-singular (since 299 FGMRES does not guarantee this) */ 300 301 /* Now apply rotations to new col of hessenberg (and right side of system), 302 calculate new rotation, and get new residual norm at the same time*/ 303 ierr = KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm);CHKERRQ(ierr); 304 if (ksp->reason) break; 305 306 loc_it++; 307 pipefgmres->it = (loc_it-1); /* Add this here in case it has converged */ 308 309 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); 310 ksp->its++; 311 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 312 else ksp->rnorm = 0; 313 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); 314 315 ierr = (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); 316 317 /* Catch error in happy breakdown and signal convergence and break from loop */ 318 if (hapend) { 319 if (!ksp->reason) { 320 if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm); 321 else { 322 ksp->reason = KSP_DIVERGED_BREAKDOWN; 323 break; 324 } 325 } 326 } 327 } 328 /* END OF ITERATION LOOP */ 329 ierr = KSPLogResidualHistory(ksp,ksp->rnorm);CHKERRQ(ierr); 330 331 /* 332 Monitor if we know that we will not return for a restart */ 333 if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) { 334 ierr = KSPMonitor(ksp,ksp->its,ksp->rnorm);CHKERRQ(ierr); 335 } 336 337 if (itcount) *itcount = loc_it; 338 339 /* 340 Down here we have to solve for the "best" coefficients of the Krylov 341 columns, add the solution values together, and possibly unwind the 342 preconditioning from the solution 343 */ 344 345 /* Form the solution (or the solution so far) */ 346 /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln 347 properly navigates */ 348 349 ierr = KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr); 350 351 PetscFunctionReturn(0); 352 } 353 354 /* 355 KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method. 356 357 358 Input Parameter: 359 . ksp - the Krylov space object that was set to use pipefgmres 360 361 Output Parameter: 362 . outits - number of iterations used 363 364 */ 365 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) 366 { 367 PetscErrorCode ierr; 368 PetscInt its,itcount; 369 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 370 PetscBool guess_zero = ksp->guess_zero; 371 372 PetscFunctionBegin; 373 /* We have not checked these routines for use with complex numbers. The inner products 374 are likely not defined correctly for that case */ 375 if (PetscDefined(PETSC_USE_COMPLEX) && !PetscDefined(PETSC_SKIP_COMPLEX)) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars"); 376 377 ierr = PetscCitationsRegister(citation,&cited);CHKERRQ(ierr); 378 379 if (ksp->calc_sings && !pipefgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); 380 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); 381 ksp->its = 0; 382 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); 383 384 itcount = 0; 385 ksp->reason = KSP_CONVERGED_ITERATING; 386 while (!ksp->reason) { 387 ierr = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);CHKERRQ(ierr); 388 ierr = KSPPIPEFGMRESCycle(&its,ksp);CHKERRQ(ierr); 389 itcount += its; 390 if (itcount >= ksp->max_it) { 391 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; 392 break; 393 } 394 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ 395 } 396 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ 397 PetscFunctionReturn(0); 398 } 399 400 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) 401 { 402 PetscErrorCode ierr; 403 404 PetscFunctionBegin; 405 ierr = KSPReset_PIPEFGMRES(ksp);CHKERRQ(ierr); 406 ierr = KSPDestroy_GMRES(ksp);CHKERRQ(ierr); 407 PetscFunctionReturn(0); 408 } 409 410 /* 411 KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the 412 current iterates. 413 414 Input parameters: 415 nrs - work area of size it + 1. 416 vguess - index of initial guess 417 vdest - index of result. Note that vguess may == vdest (replace 418 guess with the solution). 419 it - HH upper triangular part is a block of size (it+1) x (it+1) 420 421 This is an internal routine that knows about the PIPEFGMRES internals. 422 */ 423 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it) 424 { 425 PetscScalar tt; 426 PetscErrorCode ierr; 427 PetscInt k,j; 428 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 429 430 PetscFunctionBegin; 431 /* Solve for solution vector that minimizes the residual */ 432 433 if (it < 0) { /* no pipefgmres steps have been performed */ 434 ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exits immediately if vguess == vdest */ 435 PetscFunctionReturn(0); 436 } 437 438 /* solve the upper triangular system - RS is the right side and HH is 439 the upper triangular matrix - put soln in nrs */ 440 if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it); 441 else nrs[it] = 0.0; 442 443 for (k=it-1; k>=0; k--) { 444 tt = *RS(k); 445 for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j]; 446 nrs[k] = tt / *HH(k,k); 447 } 448 449 /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */ 450 ierr = VecZeroEntries(VEC_TEMP);CHKERRQ(ierr); 451 ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));CHKERRQ(ierr); 452 453 /* add solution to previous solution */ 454 if (vdest == vguess) { 455 ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr); 456 } else { 457 ierr = VecWAXPY(vdest,1.0,VEC_TEMP,vguess);CHKERRQ(ierr); 458 } 459 PetscFunctionReturn(0); 460 } 461 462 /* 463 464 KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization. 465 Return new residual. 466 467 input parameters: 468 469 . ksp - Krylov space object 470 . it - plane rotations are applied to the (it+1)th column of the 471 modified hessenberg (i.e. HH(:,it)) 472 . hapend - PETSC_FALSE not happy breakdown ending. 473 474 output parameters: 475 . res - the new residual 476 477 */ 478 /* 479 . it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this 480 */ 481 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res) 482 { 483 PetscScalar *hh,*cc,*ss,*rs; 484 PetscInt j; 485 PetscReal hapbnd; 486 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 487 PetscErrorCode ierr; 488 489 PetscFunctionBegin; 490 hh = HH(0,it); /* pointer to beginning of column to update */ 491 cc = CC(0); /* beginning of cosine rotations */ 492 ss = SS(0); /* beginning of sine rotations */ 493 rs = RS(0); /* right hand side of least squares system */ 494 495 /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ 496 for (j=0; j<=it+1; j++) *HES(j,it) = hh[j]; 497 498 /* check for the happy breakdown */ 499 hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol); 500 if (PetscAbsScalar(hh[it+1]) < hapbnd) { 501 ierr = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr); 502 *hapend = PETSC_TRUE; 503 } 504 505 /* Apply all the previously computed plane rotations to the new column 506 of the Hessenberg matrix */ 507 /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), 508 and some refs have [c s ; -conj(s) c] (don't be confused!) */ 509 510 for (j=0; j<it; j++) { 511 PetscScalar hhj = hh[j]; 512 hh[j] = PetscConj(cc[j])*hhj + ss[j]*hh[j+1]; 513 hh[j+1] = -ss[j] *hhj + cc[j]*hh[j+1]; 514 } 515 516 /* 517 compute the new plane rotation, and apply it to: 518 1) the right-hand-side of the Hessenberg system (RS) 519 note: it affects RS(it) and RS(it+1) 520 2) the new column of the Hessenberg matrix 521 note: it affects HH(it,it) which is currently pointed to 522 by hh and HH(it+1, it) (*(hh+1)) 523 thus obtaining the updated value of the residual... 524 */ 525 526 /* compute new plane rotation */ 527 528 if (!*hapend) { 529 PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1]))); 530 if (delta == 0.0) { 531 ksp->reason = KSP_DIVERGED_NULL; 532 PetscFunctionReturn(0); 533 } 534 535 cc[it] = hh[it] / delta; /* new cosine value */ 536 ss[it] = hh[it+1] / delta; /* new sine value */ 537 538 hh[it] = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1]; 539 rs[it+1] = -ss[it]*rs[it]; 540 rs[it] = PetscConj(cc[it])*rs[it]; 541 *res = PetscAbsScalar(rs[it+1]); 542 } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply 543 another rotation matrix (so RH doesn't change). The new residual is 544 always the new sine term times the residual from last time (RS(it)), 545 but now the new sine rotation would be zero...so the residual should 546 be zero...so we will multiply "zero" by the last residual. This might 547 not be exactly what we want to do here -could just return "zero". */ 548 549 *res = 0.0; 550 } 551 PetscFunctionReturn(0); 552 } 553 554 /* 555 KSPBuildSolution_PIPEFGMRES 556 557 Input Parameter: 558 . ksp - the Krylov space object 559 . ptr- 560 561 Output Parameter: 562 . result - the solution 563 564 Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle 565 calls directly. 566 567 */ 568 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result) 569 { 570 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 571 PetscErrorCode ierr; 572 573 PetscFunctionBegin; 574 if (!ptr) { 575 if (!pipefgmres->sol_temp) { 576 ierr = VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp);CHKERRQ(ierr); 577 ierr = PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp);CHKERRQ(ierr); 578 } 579 ptr = pipefgmres->sol_temp; 580 } 581 if (!pipefgmres->nrs) { 582 /* allocate the work area */ 583 ierr = PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs);CHKERRQ(ierr); 584 ierr = PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar));CHKERRQ(ierr); 585 } 586 587 ierr = KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it);CHKERRQ(ierr); 588 if (result) *result = ptr; 589 PetscFunctionReturn(0); 590 } 591 592 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp) 593 { 594 PetscErrorCode ierr; 595 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 596 PetscBool flg; 597 PetscScalar shift; 598 599 PetscFunctionBegin; 600 ierr = KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);CHKERRQ(ierr); 601 ierr = PetscOptionsHead(PetscOptionsObject,"KSP pipelined FGMRES Options");CHKERRQ(ierr); 602 ierr = PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg);CHKERRQ(ierr); 603 if (flg) { ierr = KSPPIPEFGMRESSetShift(ksp,shift);CHKERRQ(ierr); } 604 ierr = PetscOptionsTail();CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer) 609 { 610 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 611 PetscErrorCode ierr; 612 PetscBool iascii,isstring; 613 614 PetscFunctionBegin; 615 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 616 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 617 618 if (iascii) { 619 ierr = PetscViewerASCIIPrintf(viewer," restart=%D\n",pipefgmres->max_k);CHKERRQ(ierr); 620 ierr = PetscViewerASCIIPrintf(viewer," happy breakdown tolerance %g\n",(double)pipefgmres->haptol);CHKERRQ(ierr); 621 #if defined(PETSC_USE_COMPLEX) 622 ierr = PetscViewerASCIIPrintf(viewer," shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr); 623 #else 624 ierr = PetscViewerASCIIPrintf(viewer," shift=%g\n",pipefgmres->shift);CHKERRQ(ierr); 625 #endif 626 } else if (isstring) { 627 ierr = PetscViewerStringSPrintf(viewer,"restart %D",pipefgmres->max_k);CHKERRQ(ierr); 628 #if defined(PETSC_USE_COMPLEX) 629 ierr = PetscViewerStringSPrintf(viewer," shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr); 630 #else 631 ierr = PetscViewerStringSPrintf(viewer," shift=%g\n",pipefgmres->shift);CHKERRQ(ierr); 632 #endif 633 } 634 PetscFunctionReturn(0); 635 } 636 637 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) 638 { 639 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 640 PetscErrorCode ierr; 641 PetscInt i; 642 643 PetscFunctionBegin; 644 ierr = PetscFree(pipefgmres->prevecs);CHKERRQ(ierr); 645 ierr = PetscFree(pipefgmres->zvecs);CHKERRQ(ierr); 646 for (i=0; i<pipefgmres->nwork_alloc; i++) { 647 ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i]);CHKERRQ(ierr); 648 ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i]);CHKERRQ(ierr); 649 } 650 ierr = PetscFree(pipefgmres->prevecs_user_work);CHKERRQ(ierr); 651 ierr = PetscFree(pipefgmres->zvecs_user_work);CHKERRQ(ierr); 652 ierr = PetscFree(pipefgmres->redux);CHKERRQ(ierr); 653 ierr = KSPReset_GMRES(ksp);CHKERRQ(ierr); 654 PetscFunctionReturn(0); 655 } 656 657 /*MC 658 KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method. 659 660 A flexible, 1-stage pipelined variant of GMRES. 661 662 Options Database Keys: 663 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against 664 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence) 665 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of 666 . -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift() 667 vectors are allocated as needed) 668 - -ksp_gmres_krylov_monitor - plot the Krylov space generated 669 670 671 Level: intermediate 672 673 Notes: 674 675 This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter. 676 677 A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator. 678 679 Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES). 680 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods. 681 See the FAQ on the PETSc website for details. 682 683 Developer Notes: 684 This class is subclassed off of KSPGMRES. 685 686 Reference: 687 P. Sanan, S.M. Schnepp, and D.A. May, 688 "Pipelined, Flexible Krylov Subspace Methods," 689 SIAM Journal on Scientific Computing 2016 38:5, C441-C470, 690 DOI: 10.1137/15M1049130 691 692 .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPLGMRES, KSPPIPECG, KSPPIPECR, KSPPGMRES, KSPFGMRES 693 KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESMonitorKrylov(), KSPPIPEFGMRESSetShift() 694 M*/ 695 696 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) 697 { 698 KSP_PIPEFGMRES *pipefgmres; 699 PetscErrorCode ierr; 700 701 PetscFunctionBegin; 702 ierr = PetscNewLog(ksp,&pipefgmres);CHKERRQ(ierr); 703 704 ksp->data = (void*)pipefgmres; 705 ksp->ops->buildsolution = KSPBuildSolution_PIPEFGMRES; 706 ksp->ops->setup = KSPSetUp_PIPEFGMRES; 707 ksp->ops->solve = KSPSolve_PIPEFGMRES; 708 ksp->ops->reset = KSPReset_PIPEFGMRES; 709 ksp->ops->destroy = KSPDestroy_PIPEFGMRES; 710 ksp->ops->view = KSPView_PIPEFGMRES; 711 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFGMRES; 712 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES; 713 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES; 714 715 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3);CHKERRQ(ierr); 716 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);CHKERRQ(ierr); 717 718 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);CHKERRQ(ierr); 719 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);CHKERRQ(ierr); 720 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES);CHKERRQ(ierr); 721 722 pipefgmres->nextra_vecs = 1; 723 pipefgmres->haptol = 1.0e-30; 724 pipefgmres->q_preallocate = 0; 725 pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS; 726 pipefgmres->orthog = NULL; 727 pipefgmres->nrs = NULL; 728 pipefgmres->sol_temp = NULL; 729 pipefgmres->max_k = PIPEFGMRES_DEFAULT_MAXK; 730 pipefgmres->Rsvd = NULL; 731 pipefgmres->orthogwork = NULL; 732 pipefgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER; 733 pipefgmres->shift = 1.0; 734 PetscFunctionReturn(0); 735 } 736 737 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it) 738 { 739 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 740 PetscInt nwork = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */ 741 PetscInt nalloc; /* number to allocate */ 742 PetscErrorCode ierr; 743 PetscInt k; 744 745 PetscFunctionBegin; 746 nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate 747 in a single chunk */ 748 749 /* Adjust the number to allocate to make sure that we don't exceed the 750 number of available slots (pipefgmres->vecs_allocated)*/ 751 if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) { 752 nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET; 753 } 754 if (!nalloc) PetscFunctionReturn(0); 755 756 pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */ 757 758 /* work vectors */ 759 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL);CHKERRQ(ierr); 760 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork]);CHKERRQ(ierr); 761 for (k=0; k < nalloc; k++) { 762 pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k]; 763 } 764 /* specify size of chunk allocated */ 765 pipefgmres->mwork_alloc[nwork] = nalloc; 766 767 /* preconditioned vectors (note we don't use VEC_OFFSET) */ 768 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL);CHKERRQ(ierr); 769 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork]);CHKERRQ(ierr); 770 for (k=0; k < nalloc; k++) { 771 pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k]; 772 } 773 774 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL);CHKERRQ(ierr); 775 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork]);CHKERRQ(ierr); 776 for (k=0; k < nalloc; k++) { 777 pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k]; 778 } 779 780 /* increment the number of work vector chunks */ 781 pipefgmres->nwork_alloc++; 782 PetscFunctionReturn(0); 783 } 784 785 /*@ 786 KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver. 787 788 A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there). 789 790 Logically Collective on ksp 791 792 Input Parameters: 793 + ksp - the Krylov space context 794 - shift - the shift 795 796 Level: intermediate 797 798 Options Database: 799 . -ksp_pipefgmres_shift <shift> 800 801 .seealso: KSPComputeEigenvalues() 802 @*/ 803 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift) 804 { 805 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 806 807 PetscFunctionBegin; 808 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1); 809 PetscValidLogicalCollectiveScalar(ksp,shift,2); 810 pipefgmres->shift = shift; 811 PetscFunctionReturn(0); 812 } 813