1 /* 2 Contributed by Patrick Sanan and Sascha M. Schnepp 3 */ 4 5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I "petscksp.h" I*/ 6 7 static PetscBool cited = PETSC_FALSE; 8 static const char citation[] = 9 "@article{SSM2016,\n" 10 " author = {P. Sanan and S.M. Schnepp and D.A. May},\n" 11 " title = {Pipelined, Flexible Krylov Subspace Methods},\n" 12 " journal = {SIAM Journal on Scientific Computing},\n" 13 " volume = {38},\n" 14 " number = {5},\n" 15 " pages = {C441-C470},\n" 16 " year = {2016},\n" 17 " doi = {10.1137/15M1049130},\n" 18 " URL = {http://dx.doi.org/10.1137/15M1049130},\n" 19 " eprint = {http://dx.doi.org/10.1137/15M1049130}\n" 20 "}\n"; 21 22 #define PIPEFGMRES_DELTA_DIRECTIONS 10 23 #define PIPEFGMRES_DEFAULT_MAXK 30 24 25 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt); 26 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*); 27 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt); 28 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP); 29 30 /* 31 32 KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres. 33 34 This is called once, usually automatically by KSPSolve() or KSPSetUp(), 35 but can be called directly by KSPSetUp(). 36 37 */ 38 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) 39 { 40 PetscErrorCode ierr; 41 PetscInt k; 42 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 43 const PetscInt max_k = pipefgmres->max_k; 44 45 PetscFunctionBegin; 46 ierr = KSPSetUp_GMRES(ksp);CHKERRQ(ierr); 47 48 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs);CHKERRQ(ierr); 49 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work);CHKERRQ(ierr); 50 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr); 51 52 ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL);CHKERRQ(ierr); 53 ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0]);CHKERRQ(ierr); 54 for (k=0; k < pipefgmres->vv_allocated; k++) { 55 pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k]; 56 } 57 58 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs);CHKERRQ(ierr); 59 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work);CHKERRQ(ierr); 60 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr); 61 62 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux);CHKERRQ(ierr); 63 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)));CHKERRQ(ierr); 64 65 ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL);CHKERRQ(ierr); 66 ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0]);CHKERRQ(ierr); 67 for (k=0; k < pipefgmres->vv_allocated; k++) { 68 pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k]; 69 } 70 71 PetscFunctionReturn(0); 72 } 73 74 /* 75 76 KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart. Return residual 77 history if requested. 78 79 input parameters: 80 . pipefgmres - structure containing parameters and work areas 81 82 output parameters: 83 . itcount - number of iterations used. If null, ignored. 84 . converged - 0 if not converged 85 86 Notes: 87 On entry, the value in vector VEC_VV(0) should be 88 the initial residual. 89 90 91 */ 92 93 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp) 94 { 95 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 96 PetscReal res_norm; 97 PetscReal hapbnd,tt; 98 PetscScalar *hh,*hes,*lhh,shift = pipefgmres->shift; 99 PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ 100 PetscErrorCode ierr; 101 PetscInt loc_it; /* local count of # of dir. in Krylov space */ 102 PetscInt max_k = pipefgmres->max_k; /* max # of directions Krylov space */ 103 PetscInt i,j,k; 104 Mat Amat,Pmat; 105 Vec Q,W; /* Pipelining vectors */ 106 Vec *redux = pipefgmres->redux; /* workspace for single reduction */ 107 108 PetscFunctionBegin; 109 if (itcount) *itcount = 0; 110 111 /* Assign simpler names to these vectors, allocated as pipelining workspace */ 112 Q = VEC_Q; 113 W = VEC_W; 114 115 /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/ 116 /* Note that we add an extra value here to allow for a single reduction */ 117 if (!pipefgmres->orthogwork) { ierr = PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork);CHKERRQ(ierr); 118 } 119 lhh = pipefgmres->orthogwork; 120 121 /* Number of pseudo iterations since last restart is the number 122 of prestart directions */ 123 loc_it = 0; 124 125 /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in 126 KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln. 127 Note that when KSPPIPEFGMRESBuildSoln is called from this function, 128 (loc_it -1) is passed, so the two are equivalent */ 129 pipefgmres->it = (loc_it - 1); 130 131 /* initial residual is in VEC_VV(0) - compute its norm*/ 132 ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr); 133 134 /* first entry in right-hand-side of hessenberg system is just 135 the initial residual norm */ 136 *RS(0) = res_norm; 137 138 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); 139 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 140 else ksp->rnorm = 0; 141 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); 142 ierr = KSPLogResidualHistory(ksp,ksp->rnorm);CHKERRQ(ierr); 143 ierr = KSPMonitor(ksp,ksp->its,ksp->rnorm);CHKERRQ(ierr); 144 145 /* check for the convergence - maybe the current guess is good enough */ 146 ierr = (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); 147 if (ksp->reason) { 148 if (itcount) *itcount = 0; 149 PetscFunctionReturn(0); 150 } 151 152 /* scale VEC_VV (the initial residual) */ 153 ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr); 154 155 /* Fill the pipeline */ 156 ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr); 157 ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); 158 ierr = KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it));CHKERRQ(ierr); 159 ierr = VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it));CHKERRQ(ierr); /* Note shift */ 160 161 /* MAIN ITERATION LOOP BEGINNING*/ 162 /* keep iterating until we have converged OR generated the max number 163 of directions OR reached the max number of iterations for the method */ 164 while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { 165 if (loc_it) { 166 ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); 167 ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); 168 } 169 pipefgmres->it = (loc_it - 1); 170 171 /* see if more space is needed for work vectors */ 172 if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { 173 ierr = KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr); 174 /* (loc_it+1) is passed in as number of the first vector that should 175 be allocated */ 176 } 177 178 /* Note that these inner products are with "Z" now, so 179 in particular, lhh[loc_it] is the 'barred' or 'shifted' value, 180 not the value from the equivalent FGMRES run (even in exact arithmetic) 181 That is, the H we need for the Arnoldi relation is different from the 182 coefficients we use in the orthogonalization process,because of the shift */ 183 184 /* Do some local twiddling to allow for a single reduction */ 185 for(i=0;i<loc_it+1;i++){ 186 redux[i] = VEC_VV(i); 187 } 188 redux[loc_it+1] = ZVEC(loc_it); 189 190 /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */ 191 ierr = VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr); 192 193 /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/ 194 ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)));CHKERRQ(ierr); 195 196 /* The work to be overlapped with the inner products follows. 197 This is application of the preconditioner and the operator 198 to compute intermediate quantites which will be combined (locally) 199 with the results of the inner products. 200 */ 201 ierr = KSP_PCApply(ksp,ZVEC(loc_it),Q);CHKERRQ(ierr); 202 ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); 203 ierr = KSP_MatMult(ksp,Amat,Q,W);CHKERRQ(ierr); 204 205 /* Compute inner products of the new direction with previous directions, 206 and the norm of the to-be-orthogonalized direction "Z". 207 This information is enough to build the required entries 208 of H. The inner product with VEC_VV(it_loc) is 209 *different* than in the standard FGMRES and need to be dealt with specially. 210 That is, for standard FGMRES the orthogonalization coefficients are the same 211 as the coefficients used in the Arnoldi relation to reconstruct, but here this 212 is not true (albeit only for the one entry of H which we "unshift" below. */ 213 214 /* Finish the dot product, retrieving the extra entry */ 215 ierr = VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr); 216 tt = PetscRealPart(lhh[loc_it+1]); 217 218 /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt 219 Note that the Hessenberg entries require a shift, as these are for the 220 relation AU = VH, which is wrt unshifted basis vectors */ 221 hh = HH(0,loc_it); hes=HES(0,loc_it); 222 for (j=0; j<loc_it; j++) { 223 hh[j] = lhh[j]; 224 hes[j] = lhh[j]; 225 } 226 hh[loc_it] = lhh[loc_it] + shift; 227 hes[loc_it] = lhh[loc_it] + shift; 228 229 /* we delay applying the shift here */ 230 for (j=0; j<=loc_it; j++) { 231 lhh[j] = -lhh[j]; /* flip sign */ 232 } 233 234 /* Compute the norm of the un-normalized new direction using the rearranged formula 235 Note that these are shifted ("barred") quantities */ 236 for(k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]))); 237 /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */ 238 if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0 ; 239 if (tt < 0.0) { 240 /* If we detect square root breakdown in the norm, we must restart the algorithm. 241 Here this means we simply break the current loop and reconstruct the solution 242 using the basis we have computed thus far. Note that by breaking immediately, 243 we do not update the iteration count, so computation done in this iteration 244 should be disregarded. 245 */ 246 ierr = PetscInfo2(ksp,"Restart due to square root breakdown at it = %D, tt=%g\n",ksp->its,(double)tt);CHKERRQ(ierr); 247 break; 248 } else { 249 tt = PetscSqrtReal(tt); 250 } 251 252 /* new entry in hessenburg is the 2-norm of our new direction */ 253 hh[loc_it+1] = tt; 254 hes[loc_it+1] = tt; 255 256 /* The recurred computation for the new direction 257 The division by tt is delayed to the happy breakdown check later 258 Note placement BEFORE the unshift 259 */ 260 ierr = VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1));CHKERRQ(ierr); 261 ierr = VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0));CHKERRQ(ierr); 262 /* (VEC_VV(loc_it+1) is not normalized yet) */ 263 264 /* The recurred computation for the preconditioned vector (u) */ 265 ierr = VecCopy(Q,PREVEC(loc_it+1));CHKERRQ(ierr); 266 ierr = VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0));CHKERRQ(ierr); 267 ierr = VecScale(PREVEC(loc_it+1),1.0/tt);CHKERRQ(ierr); 268 269 /* Unshift an entry in the GS coefficients ("removing the bar") */ 270 lhh[loc_it] -= shift; 271 272 /* The recurred computation for z (Au) 273 Note placement AFTER the "unshift" */ 274 ierr = VecCopy(W,ZVEC(loc_it+1));CHKERRQ(ierr); 275 ierr = VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0));CHKERRQ(ierr); 276 ierr = VecScale(ZVEC(loc_it+1),1.0/tt);CHKERRQ(ierr); 277 278 /* Happy Breakdown Check */ 279 hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); 280 /* RS(loc_it) contains the res_norm from the last iteration */ 281 hapbnd = PetscMin(pipefgmres->haptol,hapbnd); 282 if (tt > hapbnd) { 283 /* scale new direction by its norm */ 284 ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr); 285 } else { 286 /* This happens when the solution is exactly reached. */ 287 /* So there is no new direction... */ 288 ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); /* set VEC_TEMP to 0 */ 289 hapend = PETSC_TRUE; 290 } 291 /* note that for pipefgmres we could get HES(loc_it+1, loc_it) = 0 and the 292 current solution would not be exact if HES was singular. Note that 293 HH non-singular implies that HES is not singular, and HES is guaranteed 294 to be nonsingular when PREVECS are linearly independent and A is 295 nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity 296 of HES). So we should really add a check to verify that HES is nonsingular.*/ 297 298 /* Note that to be thorough, in debug mode, one could call a LAPACK routine 299 here to check that the hessenberg matrix is indeed non-singular (since 300 FGMRES does not guarantee this) */ 301 302 /* Now apply rotations to new col of hessenberg (and right side of system), 303 calculate new rotation, and get new residual norm at the same time*/ 304 ierr = KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm);CHKERRQ(ierr); 305 if (ksp->reason) break; 306 307 loc_it++; 308 pipefgmres->it = (loc_it-1); /* Add this here in case it has converged */ 309 310 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); 311 ksp->its++; 312 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 313 else ksp->rnorm = 0; 314 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); 315 316 ierr = (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); 317 318 /* Catch error in happy breakdown and signal convergence and break from loop */ 319 if (hapend) { 320 if (!ksp->reason) { 321 if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm); 322 else { 323 ksp->reason = KSP_DIVERGED_BREAKDOWN; 324 break; 325 } 326 } 327 } 328 } 329 /* END OF ITERATION LOOP */ 330 ierr = KSPLogResidualHistory(ksp,ksp->rnorm);CHKERRQ(ierr); 331 332 /* 333 Monitor if we know that we will not return for a restart */ 334 if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) { 335 ierr = KSPMonitor(ksp,ksp->its,ksp->rnorm);CHKERRQ(ierr); 336 } 337 338 if (itcount) *itcount = loc_it; 339 340 /* 341 Down here we have to solve for the "best" coefficients of the Krylov 342 columns, add the solution values together, and possibly unwind the 343 preconditioning from the solution 344 */ 345 346 /* Form the solution (or the solution so far) */ 347 /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln 348 properly navigates */ 349 350 ierr = KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr); 351 352 PetscFunctionReturn(0); 353 } 354 355 /* 356 KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method. 357 358 359 Input Parameter: 360 . ksp - the Krylov space object that was set to use pipefgmres 361 362 Output Parameter: 363 . outits - number of iterations used 364 365 */ 366 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) 367 { 368 PetscErrorCode ierr; 369 PetscInt its,itcount; 370 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 371 PetscBool guess_zero = ksp->guess_zero; 372 373 PetscFunctionBegin; 374 375 /* We have not checked these routines for use with complex numbers. The inner products 376 are likely not defined correctly for that case */ 377 #if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX)) 378 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars"); 379 #endif 380 381 ierr = PetscCitationsRegister(citation,&cited);CHKERRQ(ierr); 382 383 if (ksp->calc_sings && !pipefgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); 384 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); 385 ksp->its = 0; 386 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); 387 388 itcount = 0; 389 ksp->reason = KSP_CONVERGED_ITERATING; 390 while (!ksp->reason) { 391 ierr = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);CHKERRQ(ierr); 392 ierr = KSPPIPEFGMRESCycle(&its,ksp);CHKERRQ(ierr); 393 itcount += its; 394 if (itcount >= ksp->max_it) { 395 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; 396 break; 397 } 398 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ 399 } 400 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ 401 PetscFunctionReturn(0); 402 } 403 404 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) 405 { 406 PetscErrorCode ierr; 407 408 PetscFunctionBegin; 409 ierr = KSPReset_PIPEFGMRES(ksp);CHKERRQ(ierr); 410 ierr = KSPDestroy_GMRES(ksp);CHKERRQ(ierr); 411 PetscFunctionReturn(0); 412 } 413 414 /* 415 KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the 416 current iterates. 417 418 Input parameters: 419 nrs - work area of size it + 1. 420 vguess - index of initial guess 421 vdest - index of result. Note that vguess may == vdest (replace 422 guess with the solution). 423 it - HH upper triangular part is a block of size (it+1) x (it+1) 424 425 This is an internal routine that knows about the PIPEFGMRES internals. 426 */ 427 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it) 428 { 429 PetscScalar tt; 430 PetscErrorCode ierr; 431 PetscInt k,j; 432 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 433 434 PetscFunctionBegin; 435 /* Solve for solution vector that minimizes the residual */ 436 437 if (it < 0) { /* no pipefgmres steps have been performed */ 438 ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exits immediately if vguess == vdest */ 439 PetscFunctionReturn(0); 440 } 441 442 /* solve the upper triangular system - RS is the right side and HH is 443 the upper triangular matrix - put soln in nrs */ 444 if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it); 445 else nrs[it] = 0.0; 446 447 for (k=it-1; k>=0; k--) { 448 tt = *RS(k); 449 for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j]; 450 nrs[k] = tt / *HH(k,k); 451 } 452 453 /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */ 454 ierr = VecZeroEntries(VEC_TEMP);CHKERRQ(ierr); 455 ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));CHKERRQ(ierr); 456 457 /* add solution to previous solution */ 458 if (vdest == vguess) { 459 ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr); 460 } else { 461 ierr = VecWAXPY(vdest,1.0,VEC_TEMP,vguess);CHKERRQ(ierr); 462 } 463 PetscFunctionReturn(0); 464 } 465 466 /* 467 468 KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization. 469 Return new residual. 470 471 input parameters: 472 473 . ksp - Krylov space object 474 . it - plane rotations are applied to the (it+1)th column of the 475 modified hessenberg (i.e. HH(:,it)) 476 . hapend - PETSC_FALSE not happy breakdown ending. 477 478 output parameters: 479 . res - the new residual 480 481 */ 482 /* 483 . it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this 484 */ 485 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res) 486 { 487 PetscScalar *hh,*cc,*ss,*rs; 488 PetscInt j; 489 PetscReal hapbnd; 490 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 491 PetscErrorCode ierr; 492 493 PetscFunctionBegin; 494 hh = HH(0,it); /* pointer to beginning of column to update */ 495 cc = CC(0); /* beginning of cosine rotations */ 496 ss = SS(0); /* beginning of sine rotations */ 497 rs = RS(0); /* right hand side of least squares system */ 498 499 /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ 500 for (j=0; j<=it+1; j++) *HES(j,it) = hh[j]; 501 502 /* check for the happy breakdown */ 503 hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol); 504 if (PetscAbsScalar(hh[it+1]) < hapbnd) { 505 ierr = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr); 506 *hapend = PETSC_TRUE; 507 } 508 509 /* Apply all the previously computed plane rotations to the new column 510 of the Hessenberg matrix */ 511 /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), 512 and some refs have [c s ; -conj(s) c] (don't be confused!) */ 513 514 for (j=0; j<it; j++) { 515 PetscScalar hhj = hh[j]; 516 hh[j] = PetscConj(cc[j])*hhj + ss[j]*hh[j+1]; 517 hh[j+1] = -ss[j] *hhj + cc[j]*hh[j+1]; 518 } 519 520 /* 521 compute the new plane rotation, and apply it to: 522 1) the right-hand-side of the Hessenberg system (RS) 523 note: it affects RS(it) and RS(it+1) 524 2) the new column of the Hessenberg matrix 525 note: it affects HH(it,it) which is currently pointed to 526 by hh and HH(it+1, it) (*(hh+1)) 527 thus obtaining the updated value of the residual... 528 */ 529 530 /* compute new plane rotation */ 531 532 if (!*hapend) { 533 PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1]))); 534 if (delta == 0.0) { 535 ksp->reason = KSP_DIVERGED_NULL; 536 PetscFunctionReturn(0); 537 } 538 539 cc[it] = hh[it] / delta; /* new cosine value */ 540 ss[it] = hh[it+1] / delta; /* new sine value */ 541 542 hh[it] = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1]; 543 rs[it+1] = -ss[it]*rs[it]; 544 rs[it] = PetscConj(cc[it])*rs[it]; 545 *res = PetscAbsScalar(rs[it+1]); 546 } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply 547 another rotation matrix (so RH doesn't change). The new residual is 548 always the new sine term times the residual from last time (RS(it)), 549 but now the new sine rotation would be zero...so the residual should 550 be zero...so we will multiply "zero" by the last residual. This might 551 not be exactly what we want to do here -could just return "zero". */ 552 553 *res = 0.0; 554 } 555 PetscFunctionReturn(0); 556 } 557 558 /* 559 KSPBuildSolution_PIPEFGMRES 560 561 Input Parameter: 562 . ksp - the Krylov space object 563 . ptr- 564 565 Output Parameter: 566 . result - the solution 567 568 Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle 569 calls directly. 570 571 */ 572 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result) 573 { 574 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 575 PetscErrorCode ierr; 576 577 PetscFunctionBegin; 578 if (!ptr) { 579 if (!pipefgmres->sol_temp) { 580 ierr = VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp);CHKERRQ(ierr); 581 ierr = PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp);CHKERRQ(ierr); 582 } 583 ptr = pipefgmres->sol_temp; 584 } 585 if (!pipefgmres->nrs) { 586 /* allocate the work area */ 587 ierr = PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs);CHKERRQ(ierr); 588 ierr = PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar));CHKERRQ(ierr); 589 } 590 591 ierr = KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it);CHKERRQ(ierr); 592 if (result) *result = ptr; 593 PetscFunctionReturn(0); 594 } 595 596 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp) 597 { 598 PetscErrorCode ierr; 599 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 600 PetscBool flg; 601 PetscScalar shift; 602 603 PetscFunctionBegin; 604 ierr = KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);CHKERRQ(ierr); 605 ierr = PetscOptionsHead(PetscOptionsObject,"KSP pipelined FGMRES Options");CHKERRQ(ierr); 606 ierr = PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg);CHKERRQ(ierr); 607 if (flg) { ierr = KSPPIPEFGMRESSetShift(ksp,shift);CHKERRQ(ierr); } 608 ierr = PetscOptionsTail();CHKERRQ(ierr); 609 PetscFunctionReturn(0); 610 } 611 612 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer) 613 { 614 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 615 PetscErrorCode ierr; 616 PetscBool iascii,isstring; 617 618 PetscFunctionBegin; 619 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 620 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 621 622 if (iascii) { 623 ierr = PetscViewerASCIIPrintf(viewer," restart=%D\n",pipefgmres->max_k);CHKERRQ(ierr); 624 ierr = PetscViewerASCIIPrintf(viewer," happy breakdown tolerance %g\n",(double)pipefgmres->haptol);CHKERRQ(ierr); 625 #if defined(PETSC_USE_COMPLEX) 626 ierr = PetscViewerASCIIPrintf(viewer," shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr); 627 #else 628 ierr = PetscViewerASCIIPrintf(viewer," shift=%g\n",pipefgmres->shift);CHKERRQ(ierr); 629 #endif 630 } else if (isstring) { 631 ierr = PetscViewerStringSPrintf(viewer,"restart %D",pipefgmres->max_k);CHKERRQ(ierr); 632 #if defined(PETSC_USE_COMPLEX) 633 ierr = PetscViewerStringSPrintf(viewer," shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr); 634 #else 635 ierr = PetscViewerStringSPrintf(viewer," shift=%g\n",pipefgmres->shift);CHKERRQ(ierr); 636 #endif 637 } 638 PetscFunctionReturn(0); 639 } 640 641 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) 642 { 643 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 644 PetscErrorCode ierr; 645 PetscInt i; 646 647 PetscFunctionBegin; 648 ierr = PetscFree(pipefgmres->prevecs);CHKERRQ(ierr); 649 ierr = PetscFree(pipefgmres->zvecs);CHKERRQ(ierr); 650 for (i=0; i<pipefgmres->nwork_alloc; i++) { 651 ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i]);CHKERRQ(ierr); 652 ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i]);CHKERRQ(ierr); 653 } 654 ierr = PetscFree(pipefgmres->prevecs_user_work);CHKERRQ(ierr); 655 ierr = PetscFree(pipefgmres->zvecs_user_work);CHKERRQ(ierr); 656 ierr = PetscFree(pipefgmres->redux);CHKERRQ(ierr); 657 ierr = KSPReset_GMRES(ksp);CHKERRQ(ierr); 658 PetscFunctionReturn(0); 659 } 660 661 /*MC 662 KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method. 663 664 A flexible, 1-stage pipelined variant of GMRES. 665 666 Options Database Keys: 667 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against 668 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence) 669 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of 670 . -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift() 671 vectors are allocated as needed) 672 - -ksp_gmres_krylov_monitor - plot the Krylov space generated 673 674 675 Level: intermediate 676 677 Notes: 678 679 This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter. 680 681 A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator. 682 683 Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES). 684 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods. 685 See the FAQ on the PETSc website for details. 686 687 Developer Notes: 688 This class is subclassed off of KSPGMRES. 689 690 Reference: 691 P. Sanan, S.M. Schnepp, and D.A. May, 692 "Pipelined, Flexible Krylov Subspace Methods," 693 SIAM Journal on Scientific Computing 2016 38:5, C441-C470, 694 DOI: 10.1137/15M1049130 695 696 .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPLGMRES, KSPPIPECG, KSPPIPECR, KSPPGMRES, KSPFGMRES 697 KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESMonitorKrylov(), KSPPIPEFGMRESSetShift() 698 M*/ 699 700 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) 701 { 702 KSP_PIPEFGMRES *pipefgmres; 703 PetscErrorCode ierr; 704 705 PetscFunctionBegin; 706 ierr = PetscNewLog(ksp,&pipefgmres);CHKERRQ(ierr); 707 708 ksp->data = (void*)pipefgmres; 709 ksp->ops->buildsolution = KSPBuildSolution_PIPEFGMRES; 710 ksp->ops->setup = KSPSetUp_PIPEFGMRES; 711 ksp->ops->solve = KSPSolve_PIPEFGMRES; 712 ksp->ops->reset = KSPReset_PIPEFGMRES; 713 ksp->ops->destroy = KSPDestroy_PIPEFGMRES; 714 ksp->ops->view = KSPView_PIPEFGMRES; 715 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFGMRES; 716 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES; 717 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES; 718 719 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3);CHKERRQ(ierr); 720 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);CHKERRQ(ierr); 721 722 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);CHKERRQ(ierr); 723 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);CHKERRQ(ierr); 724 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES);CHKERRQ(ierr); 725 726 pipefgmres->nextra_vecs = 1; 727 pipefgmres->haptol = 1.0e-30; 728 pipefgmres->q_preallocate = 0; 729 pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS; 730 pipefgmres->orthog = NULL; 731 pipefgmres->nrs = NULL; 732 pipefgmres->sol_temp = NULL; 733 pipefgmres->max_k = PIPEFGMRES_DEFAULT_MAXK; 734 pipefgmres->Rsvd = NULL; 735 pipefgmres->orthogwork = NULL; 736 pipefgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER; 737 pipefgmres->shift = 1.0; 738 PetscFunctionReturn(0); 739 } 740 741 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it) 742 { 743 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 744 PetscInt nwork = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */ 745 PetscInt nalloc; /* number to allocate */ 746 PetscErrorCode ierr; 747 PetscInt k; 748 749 PetscFunctionBegin; 750 nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate 751 in a single chunk */ 752 753 /* Adjust the number to allocate to make sure that we don't exceed the 754 number of available slots (pipefgmres->vecs_allocated)*/ 755 if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) { 756 nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET; 757 } 758 if (!nalloc) PetscFunctionReturn(0); 759 760 pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */ 761 762 /* work vectors */ 763 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL);CHKERRQ(ierr); 764 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork]);CHKERRQ(ierr); 765 for (k=0; k < nalloc; k++) { 766 pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k]; 767 } 768 /* specify size of chunk allocated */ 769 pipefgmres->mwork_alloc[nwork] = nalloc; 770 771 /* preconditioned vectors (note we don't use VEC_OFFSET) */ 772 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL);CHKERRQ(ierr); 773 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork]);CHKERRQ(ierr); 774 for (k=0; k < nalloc; k++) { 775 pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k]; 776 } 777 778 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL);CHKERRQ(ierr); 779 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork]);CHKERRQ(ierr); 780 for (k=0; k < nalloc; k++) { 781 pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k]; 782 } 783 784 /* increment the number of work vector chunks */ 785 pipefgmres->nwork_alloc++; 786 PetscFunctionReturn(0); 787 } 788 /*@ 789 KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver. 790 791 A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there). 792 793 Logically Collective on ksp 794 795 Input Parameters: 796 + ksp - the Krylov space context 797 - shift - the shift 798 799 Level: intermediate 800 801 Options Database: 802 . -ksp_pipefgmres_shift <shift> 803 804 .seealso: KSPComputeEigenvalues() 805 @*/ 806 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift) 807 { 808 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 809 810 PetscFunctionBegin; 811 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1); 812 PetscValidLogicalCollectiveScalar(ksp,shift,2); 813 pipefgmres->shift = shift; 814 PetscFunctionReturn(0); 815 } 816