1 /* 2 Contributed by Patrick Sanan and Sascha M. Schnepp 3 */ 4 5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I "petscksp.h" I*/ 6 7 static PetscBool cited = PETSC_FALSE; 8 static const char citation[] = 9 "@article{SSM2016,\n" 10 " author = {P. Sanan and S.M. Schnepp and D.A. May},\n" 11 " title = {Pipelined, Flexible Krylov Subspace Methods},\n" 12 " journal = {SIAM Journal on Scientific Computing},\n" 13 " volume = {38},\n" 14 " number = {5},\n" 15 " pages = {C441-C470},\n" 16 " year = {2016},\n" 17 " doi = {10.1137/15M1049130},\n" 18 " URL = {http://dx.doi.org/10.1137/15M1049130},\n" 19 " eprint = {http://dx.doi.org/10.1137/15M1049130}\n" 20 "}\n"; 21 22 #define PIPEFGMRES_DELTA_DIRECTIONS 10 23 #define PIPEFGMRES_DEFAULT_MAXK 30 24 25 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt); 26 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*); 27 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt); 28 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP); 29 30 /* 31 32 KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres. 33 34 This is called once, usually automatically by KSPSolve() or KSPSetUp(), 35 but can be called directly by KSPSetUp(). 36 37 */ 38 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) 39 { 40 PetscErrorCode ierr; 41 PetscInt k; 42 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 43 const PetscInt max_k = pipefgmres->max_k; 44 45 PetscFunctionBegin; 46 ierr = KSPSetUp_GMRES(ksp);CHKERRQ(ierr); 47 48 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs);CHKERRQ(ierr); 49 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work);CHKERRQ(ierr); 50 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr); 51 52 ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL);CHKERRQ(ierr); 53 ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0]);CHKERRQ(ierr); 54 for (k=0; k < pipefgmres->vv_allocated; k++) { 55 pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k]; 56 } 57 58 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs);CHKERRQ(ierr); 59 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work);CHKERRQ(ierr); 60 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr); 61 62 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux);CHKERRQ(ierr); 63 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)));CHKERRQ(ierr); 64 65 ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL);CHKERRQ(ierr); 66 ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0]);CHKERRQ(ierr); 67 for (k=0; k < pipefgmres->vv_allocated; k++) { 68 pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k]; 69 } 70 71 PetscFunctionReturn(0); 72 } 73 74 /* 75 76 KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart. Return residual 77 history if requested. 78 79 input parameters: 80 . pipefgmres - structure containing parameters and work areas 81 82 output parameters: 83 . itcount - number of iterations used. If null, ignored. 84 . converged - 0 if not converged 85 86 Notes: 87 On entry, the value in vector VEC_VV(0) should be 88 the initial residual. 89 90 */ 91 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp) 92 { 93 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 94 PetscReal res_norm; 95 PetscReal hapbnd,tt; 96 PetscScalar *hh,*hes,*lhh,shift = pipefgmres->shift; 97 PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ 98 PetscErrorCode ierr; 99 PetscInt loc_it; /* local count of # of dir. in Krylov space */ 100 PetscInt max_k = pipefgmres->max_k; /* max # of directions Krylov space */ 101 PetscInt i,j,k; 102 Mat Amat,Pmat; 103 Vec Q,W; /* Pipelining vectors */ 104 Vec *redux = pipefgmres->redux; /* workspace for single reduction */ 105 106 PetscFunctionBegin; 107 if (itcount) *itcount = 0; 108 109 /* Assign simpler names to these vectors, allocated as pipelining workspace */ 110 Q = VEC_Q; 111 W = VEC_W; 112 113 /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/ 114 /* Note that we add an extra value here to allow for a single reduction */ 115 if (!pipefgmres->orthogwork) { ierr = PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork);CHKERRQ(ierr); 116 } 117 lhh = pipefgmres->orthogwork; 118 119 /* Number of pseudo iterations since last restart is the number 120 of prestart directions */ 121 loc_it = 0; 122 123 /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in 124 KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln. 125 Note that when KSPPIPEFGMRESBuildSoln is called from this function, 126 (loc_it -1) is passed, so the two are equivalent */ 127 pipefgmres->it = (loc_it - 1); 128 129 /* initial residual is in VEC_VV(0) - compute its norm*/ 130 ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr); 131 132 /* first entry in right-hand-side of hessenberg system is just 133 the initial residual norm */ 134 *RS(0) = res_norm; 135 136 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); 137 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 138 else ksp->rnorm = 0; 139 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); 140 ierr = KSPLogResidualHistory(ksp,ksp->rnorm);CHKERRQ(ierr); 141 ierr = KSPMonitor(ksp,ksp->its,ksp->rnorm);CHKERRQ(ierr); 142 143 /* check for the convergence - maybe the current guess is good enough */ 144 ierr = (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); 145 if (ksp->reason) { 146 if (itcount) *itcount = 0; 147 PetscFunctionReturn(0); 148 } 149 150 /* scale VEC_VV (the initial residual) */ 151 ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr); 152 153 /* Fill the pipeline */ 154 ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr); 155 ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); 156 ierr = KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it));CHKERRQ(ierr); 157 ierr = VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it));CHKERRQ(ierr); /* Note shift */ 158 159 /* MAIN ITERATION LOOP BEGINNING*/ 160 /* keep iterating until we have converged OR generated the max number 161 of directions OR reached the max number of iterations for the method */ 162 while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { 163 if (loc_it) { 164 ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); 165 ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); 166 } 167 pipefgmres->it = (loc_it - 1); 168 169 /* see if more space is needed for work vectors */ 170 if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { 171 ierr = KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr); 172 /* (loc_it+1) is passed in as number of the first vector that should 173 be allocated */ 174 } 175 176 /* Note that these inner products are with "Z" now, so 177 in particular, lhh[loc_it] is the 'barred' or 'shifted' value, 178 not the value from the equivalent FGMRES run (even in exact arithmetic) 179 That is, the H we need for the Arnoldi relation is different from the 180 coefficients we use in the orthogonalization process,because of the shift */ 181 182 /* Do some local twiddling to allow for a single reduction */ 183 for (i=0;i<loc_it+1;i++) { 184 redux[i] = VEC_VV(i); 185 } 186 redux[loc_it+1] = ZVEC(loc_it); 187 188 /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */ 189 ierr = VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr); 190 191 /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/ 192 ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)));CHKERRQ(ierr); 193 194 /* The work to be overlapped with the inner products follows. 195 This is application of the preconditioner and the operator 196 to compute intermediate quantites which will be combined (locally) 197 with the results of the inner products. 198 */ 199 ierr = KSP_PCApply(ksp,ZVEC(loc_it),Q);CHKERRQ(ierr); 200 ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); 201 ierr = KSP_MatMult(ksp,Amat,Q,W);CHKERRQ(ierr); 202 203 /* Compute inner products of the new direction with previous directions, 204 and the norm of the to-be-orthogonalized direction "Z". 205 This information is enough to build the required entries 206 of H. The inner product with VEC_VV(it_loc) is 207 *different* than in the standard FGMRES and need to be dealt with specially. 208 That is, for standard FGMRES the orthogonalization coefficients are the same 209 as the coefficients used in the Arnoldi relation to reconstruct, but here this 210 is not true (albeit only for the one entry of H which we "unshift" below. */ 211 212 /* Finish the dot product, retrieving the extra entry */ 213 ierr = VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr); 214 tt = PetscRealPart(lhh[loc_it+1]); 215 216 /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt 217 Note that the Hessenberg entries require a shift, as these are for the 218 relation AU = VH, which is wrt unshifted basis vectors */ 219 hh = HH(0,loc_it); hes=HES(0,loc_it); 220 for (j=0; j<loc_it; j++) { 221 hh[j] = lhh[j]; 222 hes[j] = lhh[j]; 223 } 224 hh[loc_it] = lhh[loc_it] + shift; 225 hes[loc_it] = lhh[loc_it] + shift; 226 227 /* we delay applying the shift here */ 228 for (j=0; j<=loc_it; j++) { 229 lhh[j] = -lhh[j]; /* flip sign */ 230 } 231 232 /* Compute the norm of the un-normalized new direction using the rearranged formula 233 Note that these are shifted ("barred") quantities */ 234 for (k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]))); 235 /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */ 236 if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0 ; 237 if (tt < 0.0) { 238 /* If we detect square root breakdown in the norm, we must restart the algorithm. 239 Here this means we simply break the current loop and reconstruct the solution 240 using the basis we have computed thus far. Note that by breaking immediately, 241 we do not update the iteration count, so computation done in this iteration 242 should be disregarded. 243 */ 244 ierr = PetscInfo2(ksp,"Restart due to square root breakdown at it = %D, tt=%g\n",ksp->its,(double)tt);CHKERRQ(ierr); 245 break; 246 } else { 247 tt = PetscSqrtReal(tt); 248 } 249 250 /* new entry in hessenburg is the 2-norm of our new direction */ 251 hh[loc_it+1] = tt; 252 hes[loc_it+1] = tt; 253 254 /* The recurred computation for the new direction 255 The division by tt is delayed to the happy breakdown check later 256 Note placement BEFORE the unshift 257 */ 258 ierr = VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1));CHKERRQ(ierr); 259 ierr = VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0));CHKERRQ(ierr); 260 /* (VEC_VV(loc_it+1) is not normalized yet) */ 261 262 /* The recurred computation for the preconditioned vector (u) */ 263 ierr = VecCopy(Q,PREVEC(loc_it+1));CHKERRQ(ierr); 264 ierr = VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0));CHKERRQ(ierr); 265 ierr = VecScale(PREVEC(loc_it+1),1.0/tt);CHKERRQ(ierr); 266 267 /* Unshift an entry in the GS coefficients ("removing the bar") */ 268 lhh[loc_it] -= shift; 269 270 /* The recurred computation for z (Au) 271 Note placement AFTER the "unshift" */ 272 ierr = VecCopy(W,ZVEC(loc_it+1));CHKERRQ(ierr); 273 ierr = VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0));CHKERRQ(ierr); 274 ierr = VecScale(ZVEC(loc_it+1),1.0/tt);CHKERRQ(ierr); 275 276 /* Happy Breakdown Check */ 277 hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); 278 /* RS(loc_it) contains the res_norm from the last iteration */ 279 hapbnd = PetscMin(pipefgmres->haptol,hapbnd); 280 if (tt > hapbnd) { 281 /* scale new direction by its norm */ 282 ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr); 283 } else { 284 /* This happens when the solution is exactly reached. */ 285 /* So there is no new direction... */ 286 ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); /* set VEC_TEMP to 0 */ 287 hapend = PETSC_TRUE; 288 } 289 /* note that for pipefgmres we could get HES(loc_it+1, loc_it) = 0 and the 290 current solution would not be exact if HES was singular. Note that 291 HH non-singular implies that HES is not singular, and HES is guaranteed 292 to be nonsingular when PREVECS are linearly independent and A is 293 nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity 294 of HES). So we should really add a check to verify that HES is nonsingular.*/ 295 296 /* Note that to be thorough, in debug mode, one could call a LAPACK routine 297 here to check that the hessenberg matrix is indeed non-singular (since 298 FGMRES does not guarantee this) */ 299 300 /* Now apply rotations to new col of hessenberg (and right side of system), 301 calculate new rotation, and get new residual norm at the same time*/ 302 ierr = KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm);CHKERRQ(ierr); 303 if (ksp->reason) break; 304 305 loc_it++; 306 pipefgmres->it = (loc_it-1); /* Add this here in case it has converged */ 307 308 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); 309 ksp->its++; 310 if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm; 311 else ksp->rnorm = 0; 312 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); 313 314 ierr = (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); 315 316 /* Catch error in happy breakdown and signal convergence and break from loop */ 317 if (hapend) { 318 if (!ksp->reason) { 319 if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm); 320 else { 321 ksp->reason = KSP_DIVERGED_BREAKDOWN; 322 break; 323 } 324 } 325 } 326 } 327 /* END OF ITERATION LOOP */ 328 ierr = KSPLogResidualHistory(ksp,ksp->rnorm);CHKERRQ(ierr); 329 330 /* 331 Monitor if we know that we will not return for a restart */ 332 if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) { 333 ierr = KSPMonitor(ksp,ksp->its,ksp->rnorm);CHKERRQ(ierr); 334 } 335 336 if (itcount) *itcount = loc_it; 337 338 /* 339 Down here we have to solve for the "best" coefficients of the Krylov 340 columns, add the solution values together, and possibly unwind the 341 preconditioning from the solution 342 */ 343 344 /* Form the solution (or the solution so far) */ 345 /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln 346 properly navigates */ 347 348 ierr = KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr); 349 350 PetscFunctionReturn(0); 351 } 352 353 /* 354 KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method. 355 356 Input Parameter: 357 . ksp - the Krylov space object that was set to use pipefgmres 358 359 Output Parameter: 360 . outits - number of iterations used 361 362 */ 363 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) 364 { 365 PetscErrorCode ierr; 366 PetscInt its,itcount; 367 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 368 PetscBool guess_zero = ksp->guess_zero; 369 370 PetscFunctionBegin; 371 /* We have not checked these routines for use with complex numbers. The inner products 372 are likely not defined correctly for that case */ 373 if (PetscDefined(USE_COMPLEX) && !PetscDefined(SKIP_COMPLEX)) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars"); 374 375 ierr = PetscCitationsRegister(citation,&cited);CHKERRQ(ierr); 376 377 if (ksp->calc_sings && !pipefgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); 378 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); 379 ksp->its = 0; 380 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); 381 382 itcount = 0; 383 ksp->reason = KSP_CONVERGED_ITERATING; 384 while (!ksp->reason) { 385 ierr = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);CHKERRQ(ierr); 386 ierr = KSPPIPEFGMRESCycle(&its,ksp);CHKERRQ(ierr); 387 itcount += its; 388 if (itcount >= ksp->max_it) { 389 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; 390 break; 391 } 392 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ 393 } 394 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ 395 PetscFunctionReturn(0); 396 } 397 398 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) 399 { 400 PetscErrorCode ierr; 401 402 PetscFunctionBegin; 403 ierr = KSPReset_PIPEFGMRES(ksp);CHKERRQ(ierr); 404 ierr = KSPDestroy_GMRES(ksp);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 /* 409 KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the 410 current iterates. 411 412 Input parameters: 413 nrs - work area of size it + 1. 414 vguess - index of initial guess 415 vdest - index of result. Note that vguess may == vdest (replace 416 guess with the solution). 417 it - HH upper triangular part is a block of size (it+1) x (it+1) 418 419 This is an internal routine that knows about the PIPEFGMRES internals. 420 */ 421 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it) 422 { 423 PetscScalar tt; 424 PetscErrorCode ierr; 425 PetscInt k,j; 426 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 427 428 PetscFunctionBegin; 429 /* Solve for solution vector that minimizes the residual */ 430 431 if (it < 0) { /* no pipefgmres steps have been performed */ 432 ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exits immediately if vguess == vdest */ 433 PetscFunctionReturn(0); 434 } 435 436 /* solve the upper triangular system - RS is the right side and HH is 437 the upper triangular matrix - put soln in nrs */ 438 if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it); 439 else nrs[it] = 0.0; 440 441 for (k=it-1; k>=0; k--) { 442 tt = *RS(k); 443 for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j]; 444 nrs[k] = tt / *HH(k,k); 445 } 446 447 /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */ 448 ierr = VecZeroEntries(VEC_TEMP);CHKERRQ(ierr); 449 ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));CHKERRQ(ierr); 450 451 /* add solution to previous solution */ 452 if (vdest == vguess) { 453 ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr); 454 } else { 455 ierr = VecWAXPY(vdest,1.0,VEC_TEMP,vguess);CHKERRQ(ierr); 456 } 457 PetscFunctionReturn(0); 458 } 459 460 /* 461 462 KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization. 463 Return new residual. 464 465 input parameters: 466 467 . ksp - Krylov space object 468 . it - plane rotations are applied to the (it+1)th column of the 469 modified hessenberg (i.e. HH(:,it)) 470 . hapend - PETSC_FALSE not happy breakdown ending. 471 472 output parameters: 473 . res - the new residual 474 475 */ 476 /* 477 . it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this 478 */ 479 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res) 480 { 481 PetscScalar *hh,*cc,*ss,*rs; 482 PetscInt j; 483 PetscReal hapbnd; 484 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 485 PetscErrorCode ierr; 486 487 PetscFunctionBegin; 488 hh = HH(0,it); /* pointer to beginning of column to update */ 489 cc = CC(0); /* beginning of cosine rotations */ 490 ss = SS(0); /* beginning of sine rotations */ 491 rs = RS(0); /* right hand side of least squares system */ 492 493 /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ 494 for (j=0; j<=it+1; j++) *HES(j,it) = hh[j]; 495 496 /* check for the happy breakdown */ 497 hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol); 498 if (PetscAbsScalar(hh[it+1]) < hapbnd) { 499 ierr = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr); 500 *hapend = PETSC_TRUE; 501 } 502 503 /* Apply all the previously computed plane rotations to the new column 504 of the Hessenberg matrix */ 505 /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), 506 and some refs have [c s ; -conj(s) c] (don't be confused!) */ 507 508 for (j=0; j<it; j++) { 509 PetscScalar hhj = hh[j]; 510 hh[j] = PetscConj(cc[j])*hhj + ss[j]*hh[j+1]; 511 hh[j+1] = -ss[j] *hhj + cc[j]*hh[j+1]; 512 } 513 514 /* 515 compute the new plane rotation, and apply it to: 516 1) the right-hand-side of the Hessenberg system (RS) 517 note: it affects RS(it) and RS(it+1) 518 2) the new column of the Hessenberg matrix 519 note: it affects HH(it,it) which is currently pointed to 520 by hh and HH(it+1, it) (*(hh+1)) 521 thus obtaining the updated value of the residual... 522 */ 523 524 /* compute new plane rotation */ 525 526 if (!*hapend) { 527 PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1]))); 528 if (delta == 0.0) { 529 ksp->reason = KSP_DIVERGED_NULL; 530 PetscFunctionReturn(0); 531 } 532 533 cc[it] = hh[it] / delta; /* new cosine value */ 534 ss[it] = hh[it+1] / delta; /* new sine value */ 535 536 hh[it] = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1]; 537 rs[it+1] = -ss[it]*rs[it]; 538 rs[it] = PetscConj(cc[it])*rs[it]; 539 *res = PetscAbsScalar(rs[it+1]); 540 } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply 541 another rotation matrix (so RH doesn't change). The new residual is 542 always the new sine term times the residual from last time (RS(it)), 543 but now the new sine rotation would be zero...so the residual should 544 be zero...so we will multiply "zero" by the last residual. This might 545 not be exactly what we want to do here -could just return "zero". */ 546 547 *res = 0.0; 548 } 549 PetscFunctionReturn(0); 550 } 551 552 /* 553 KSPBuildSolution_PIPEFGMRES 554 555 Input Parameter: 556 . ksp - the Krylov space object 557 . ptr- 558 559 Output Parameter: 560 . result - the solution 561 562 Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle 563 calls directly. 564 565 */ 566 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result) 567 { 568 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 569 PetscErrorCode ierr; 570 571 PetscFunctionBegin; 572 if (!ptr) { 573 if (!pipefgmres->sol_temp) { 574 ierr = VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp);CHKERRQ(ierr); 575 ierr = PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp);CHKERRQ(ierr); 576 } 577 ptr = pipefgmres->sol_temp; 578 } 579 if (!pipefgmres->nrs) { 580 /* allocate the work area */ 581 ierr = PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs);CHKERRQ(ierr); 582 ierr = PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar));CHKERRQ(ierr); 583 } 584 585 ierr = KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it);CHKERRQ(ierr); 586 if (result) *result = ptr; 587 PetscFunctionReturn(0); 588 } 589 590 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp) 591 { 592 PetscErrorCode ierr; 593 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 594 PetscBool flg; 595 PetscScalar shift; 596 597 PetscFunctionBegin; 598 ierr = KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);CHKERRQ(ierr); 599 ierr = PetscOptionsHead(PetscOptionsObject,"KSP pipelined FGMRES Options");CHKERRQ(ierr); 600 ierr = PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg);CHKERRQ(ierr); 601 if (flg) { ierr = KSPPIPEFGMRESSetShift(ksp,shift);CHKERRQ(ierr); } 602 ierr = PetscOptionsTail();CHKERRQ(ierr); 603 PetscFunctionReturn(0); 604 } 605 606 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer) 607 { 608 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 609 PetscErrorCode ierr; 610 PetscBool iascii,isstring; 611 612 PetscFunctionBegin; 613 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 614 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 615 616 if (iascii) { 617 ierr = PetscViewerASCIIPrintf(viewer," restart=%D\n",pipefgmres->max_k);CHKERRQ(ierr); 618 ierr = PetscViewerASCIIPrintf(viewer," happy breakdown tolerance %g\n",(double)pipefgmres->haptol);CHKERRQ(ierr); 619 #if defined(PETSC_USE_COMPLEX) 620 ierr = PetscViewerASCIIPrintf(viewer," shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr); 621 #else 622 ierr = PetscViewerASCIIPrintf(viewer," shift=%g\n",pipefgmres->shift);CHKERRQ(ierr); 623 #endif 624 } else if (isstring) { 625 ierr = PetscViewerStringSPrintf(viewer,"restart %D",pipefgmres->max_k);CHKERRQ(ierr); 626 #if defined(PETSC_USE_COMPLEX) 627 ierr = PetscViewerStringSPrintf(viewer," shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr); 628 #else 629 ierr = PetscViewerStringSPrintf(viewer," shift=%g\n",pipefgmres->shift);CHKERRQ(ierr); 630 #endif 631 } 632 PetscFunctionReturn(0); 633 } 634 635 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) 636 { 637 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 638 PetscErrorCode ierr; 639 PetscInt i; 640 641 PetscFunctionBegin; 642 ierr = PetscFree(pipefgmres->prevecs);CHKERRQ(ierr); 643 ierr = PetscFree(pipefgmres->zvecs);CHKERRQ(ierr); 644 for (i=0; i<pipefgmres->nwork_alloc; i++) { 645 ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i]);CHKERRQ(ierr); 646 ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i]);CHKERRQ(ierr); 647 } 648 ierr = PetscFree(pipefgmres->prevecs_user_work);CHKERRQ(ierr); 649 ierr = PetscFree(pipefgmres->zvecs_user_work);CHKERRQ(ierr); 650 ierr = PetscFree(pipefgmres->redux);CHKERRQ(ierr); 651 ierr = KSPReset_GMRES(ksp);CHKERRQ(ierr); 652 PetscFunctionReturn(0); 653 } 654 655 /*MC 656 KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method. 657 658 A flexible, 1-stage pipelined variant of GMRES. 659 660 Options Database Keys: 661 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against 662 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence) 663 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of 664 . -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift() 665 vectors are allocated as needed) 666 - -ksp_gmres_krylov_monitor - plot the Krylov space generated 667 668 Level: intermediate 669 670 Notes: 671 672 This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter. 673 674 A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator. 675 676 Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES). 677 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods. 678 See the FAQ on the PETSc website for details. 679 680 Developer Notes: 681 This class is subclassed off of KSPGMRES. 682 683 Reference: 684 P. Sanan, S.M. Schnepp, and D.A. May, 685 "Pipelined, Flexible Krylov Subspace Methods," 686 SIAM Journal on Scientific Computing 2016 38:5, C441-C470, 687 DOI: 10.1137/15M1049130 688 689 .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPLGMRES, KSPPIPECG, KSPPIPECR, KSPPGMRES, KSPFGMRES 690 KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESMonitorKrylov(), KSPPIPEFGMRESSetShift() 691 M*/ 692 693 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) 694 { 695 KSP_PIPEFGMRES *pipefgmres; 696 PetscErrorCode ierr; 697 698 PetscFunctionBegin; 699 ierr = PetscNewLog(ksp,&pipefgmres);CHKERRQ(ierr); 700 701 ksp->data = (void*)pipefgmres; 702 ksp->ops->buildsolution = KSPBuildSolution_PIPEFGMRES; 703 ksp->ops->setup = KSPSetUp_PIPEFGMRES; 704 ksp->ops->solve = KSPSolve_PIPEFGMRES; 705 ksp->ops->reset = KSPReset_PIPEFGMRES; 706 ksp->ops->destroy = KSPDestroy_PIPEFGMRES; 707 ksp->ops->view = KSPView_PIPEFGMRES; 708 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFGMRES; 709 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES; 710 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES; 711 712 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3);CHKERRQ(ierr); 713 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);CHKERRQ(ierr); 714 715 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);CHKERRQ(ierr); 716 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);CHKERRQ(ierr); 717 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES);CHKERRQ(ierr); 718 719 pipefgmres->nextra_vecs = 1; 720 pipefgmres->haptol = 1.0e-30; 721 pipefgmres->q_preallocate = 0; 722 pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS; 723 pipefgmres->orthog = NULL; 724 pipefgmres->nrs = NULL; 725 pipefgmres->sol_temp = NULL; 726 pipefgmres->max_k = PIPEFGMRES_DEFAULT_MAXK; 727 pipefgmres->Rsvd = NULL; 728 pipefgmres->orthogwork = NULL; 729 pipefgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER; 730 pipefgmres->shift = 1.0; 731 PetscFunctionReturn(0); 732 } 733 734 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it) 735 { 736 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 737 PetscInt nwork = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */ 738 PetscInt nalloc; /* number to allocate */ 739 PetscErrorCode ierr; 740 PetscInt k; 741 742 PetscFunctionBegin; 743 nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate 744 in a single chunk */ 745 746 /* Adjust the number to allocate to make sure that we don't exceed the 747 number of available slots (pipefgmres->vecs_allocated)*/ 748 if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) { 749 nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET; 750 } 751 if (!nalloc) PetscFunctionReturn(0); 752 753 pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */ 754 755 /* work vectors */ 756 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL);CHKERRQ(ierr); 757 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork]);CHKERRQ(ierr); 758 for (k=0; k < nalloc; k++) { 759 pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k]; 760 } 761 /* specify size of chunk allocated */ 762 pipefgmres->mwork_alloc[nwork] = nalloc; 763 764 /* preconditioned vectors (note we don't use VEC_OFFSET) */ 765 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL);CHKERRQ(ierr); 766 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork]);CHKERRQ(ierr); 767 for (k=0; k < nalloc; k++) { 768 pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k]; 769 } 770 771 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL);CHKERRQ(ierr); 772 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork]);CHKERRQ(ierr); 773 for (k=0; k < nalloc; k++) { 774 pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k]; 775 } 776 777 /* increment the number of work vector chunks */ 778 pipefgmres->nwork_alloc++; 779 PetscFunctionReturn(0); 780 } 781 782 /*@ 783 KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver. 784 785 A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there). 786 787 Logically Collective on ksp 788 789 Input Parameters: 790 + ksp - the Krylov space context 791 - shift - the shift 792 793 Level: intermediate 794 795 Options Database: 796 . -ksp_pipefgmres_shift <shift> 797 798 .seealso: KSPComputeEigenvalues() 799 @*/ 800 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift) 801 { 802 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 803 804 PetscFunctionBegin; 805 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1); 806 PetscValidLogicalCollectiveScalar(ksp,shift,2); 807 pipefgmres->shift = shift; 808 PetscFunctionReturn(0); 809 } 810