1 /* 2 Contributed by Patrick Sanan and Sascha M. Schnepp 3 */ 4 5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I "petscksp.h" I*/ 6 7 static PetscBool cited = PETSC_FALSE; 8 static const char citation[] = 9 "@article{SSM2016,\n" 10 " author = {P. Sanan and S.M. Schnepp and D.A. May},\n" 11 " title = {Pipelined, Flexible Krylov Subspace Methods},\n" 12 " journal = {SIAM Journal on Scientific Computing},\n" 13 " volume = {38},\n" 14 " number = {5},\n" 15 " pages = {C441-C470},\n" 16 " year = {2016},\n" 17 " doi = {10.1137/15M1049130},\n" 18 " URL = {http://dx.doi.org/10.1137/15M1049130},\n" 19 " eprint = {http://dx.doi.org/10.1137/15M1049130}\n" 20 "}\n"; 21 22 #define PIPEFGMRES_DELTA_DIRECTIONS 10 23 #define PIPEFGMRES_DEFAULT_MAXK 30 24 25 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt); 26 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*); 27 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt); 28 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP); 29 30 /* 31 32 KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres. 33 34 This is called once, usually automatically by KSPSolve() or KSPSetUp(), 35 but can be called directly by KSPSetUp(). 36 37 */ 38 #undef __FUNCT__ 39 #define __FUNCT__ "KSPSetUp_PIPEFGMRES" 40 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) 41 { 42 PetscErrorCode ierr; 43 PetscInt k; 44 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 45 const PetscInt max_k = pipefgmres->max_k; 46 47 PetscFunctionBegin; 48 ierr = KSPSetUp_GMRES(ksp);CHKERRQ(ierr); 49 50 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs);CHKERRQ(ierr); 51 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work);CHKERRQ(ierr); 52 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr); 53 54 ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL);CHKERRQ(ierr); 55 ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0]);CHKERRQ(ierr); 56 for (k=0; k < pipefgmres->vv_allocated; k++) { 57 pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k]; 58 } 59 60 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs);CHKERRQ(ierr); 61 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work);CHKERRQ(ierr); 62 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr); 63 64 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux);CHKERRQ(ierr); 65 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)));CHKERRQ(ierr); 66 67 ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL);CHKERRQ(ierr); 68 ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0]);CHKERRQ(ierr); 69 for (k=0; k < pipefgmres->vv_allocated; k++) { 70 pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k]; 71 } 72 73 PetscFunctionReturn(0); 74 } 75 76 /* 77 78 KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart. Return residual 79 history if requested. 80 81 input parameters: 82 . pipefgmres - structure containing parameters and work areas 83 84 output parameters: 85 . itcount - number of iterations used. If null, ignored. 86 . converged - 0 if not converged 87 88 Notes: 89 On entry, the value in vector VEC_VV(0) should be 90 the initial residual. 91 92 93 */ 94 95 #undef __FUNCT__ 96 #define __FUNCT__ "KSPPIPEFGMRESCycle" 97 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp) 98 { 99 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 100 PetscReal res_norm; 101 PetscReal hapbnd,tt; 102 PetscScalar *hh,*hes,*lhh,shift = pipefgmres->shift; 103 PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ 104 PetscErrorCode ierr; 105 PetscInt loc_it; /* local count of # of dir. in Krylov space */ 106 PetscInt max_k = pipefgmres->max_k; /* max # of directions Krylov space */ 107 PetscInt i,j,k; 108 Mat Amat,Pmat; 109 Vec Q,W; /* Pipelining vectors */ 110 Vec *redux = pipefgmres->redux; /* workspace for single reduction */ 111 112 PetscFunctionBegin; 113 if (itcount) *itcount = 0; 114 115 /* Assign simpler names to these vectors, allocated as pipelining workspace */ 116 Q = VEC_Q; 117 W = VEC_W; 118 119 /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/ 120 /* Note that we add an extra value here to allow for a single reduction */ 121 if (!pipefgmres->orthogwork) { ierr = PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork);CHKERRQ(ierr); 122 } 123 lhh = pipefgmres->orthogwork; 124 125 /* Number of pseudo iterations since last restart is the number 126 of prestart directions */ 127 loc_it = 0; 128 129 /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in 130 KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln. 131 Note that when KSPPIPEFGMRESBuildSoln is called from this function, 132 (loc_it -1) is passed, so the two are equivalent */ 133 pipefgmres->it = (loc_it - 1); 134 135 /* initial residual is in VEC_VV(0) - compute its norm*/ 136 ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr); 137 138 /* first entry in right-hand-side of hessenberg system is just 139 the initial residual norm */ 140 *RS(0) = res_norm; 141 142 ksp->rnorm = res_norm; 143 ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); 144 ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); 145 146 /* check for the convergence - maybe the current guess is good enough */ 147 ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); 148 if (ksp->reason) { 149 if (itcount) *itcount = 0; 150 PetscFunctionReturn(0); 151 } 152 153 /* scale VEC_VV (the initial residual) */ 154 ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr); 155 156 /* Fill the pipeline */ 157 ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr); 158 ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); 159 ierr = KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it));CHKERRQ(ierr); 160 ierr = VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it));CHKERRQ(ierr); /* Note shift */ 161 162 /* MAIN ITERATION LOOP BEGINNING*/ 163 /* keep iterating until we have converged OR generated the max number 164 of directions OR reached the max number of iterations for the method */ 165 while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { 166 if (loc_it) { 167 ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); 168 ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); 169 } 170 pipefgmres->it = (loc_it - 1); 171 172 /* see if more space is needed for work vectors */ 173 if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { 174 ierr = KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr); 175 /* (loc_it+1) is passed in as number of the first vector that should 176 be allocated */ 177 } 178 179 /* Note that these inner products are with "Z" now, so 180 in particular, lhh[loc_it] is the 'barred' or 'shifted' value, 181 not the value from the equivalent FGMRES run (even in exact arithmetic) 182 That is, the H we need for the Arnoldi relation is different from the 183 coefficients we use in the orthogonalization process,because of the shift */ 184 185 /* Do some local twiddling to allow for a single reduction */ 186 for(i=0;i<loc_it+1;i++){ 187 redux[i] = VEC_VV(i); 188 } 189 redux[loc_it+1] = ZVEC(loc_it); 190 191 /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */ 192 ierr = VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr); 193 194 /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/ 195 ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)));CHKERRQ(ierr); 196 197 /* The work to be overlapped with the inner products follows. 198 This is application of the preconditioner and the operator 199 to compute intermediate quantites which will be combined (locally) 200 with the results of the inner products. 201 */ 202 ierr = KSP_PCApply(ksp,ZVEC(loc_it),Q);CHKERRQ(ierr); 203 ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); 204 ierr = KSP_MatMult(ksp,Amat,Q,W);CHKERRQ(ierr); 205 206 /* Compute inner products of the new direction with previous directions, 207 and the norm of the to-be-orthogonalized direction "Z". 208 This information is enough to build the required entries 209 of H. The inner product with VEC_VV(it_loc) is 210 *different* than in the standard FGMRES and need to be dealt with specially. 211 That is, for standard FGMRES the orthogonalization coefficients are the same 212 as the coefficients used in the Arnoldi relation to reconstruct, but here this 213 is not true (albeit only for the one entry of H which we "unshift" below. */ 214 215 /* Finish the dot product, retrieving the extra entry */ 216 ierr = VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr); 217 tt = PetscRealPart(lhh[loc_it+1]); 218 219 /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt 220 Note that the Hessenberg entries require a shift, as these are for the 221 relation AU = VH, which is wrt unshifted basis vectors */ 222 hh = HH(0,loc_it); hes=HES(0,loc_it); 223 for (j=0; j<loc_it; j++) { 224 hh[j] = lhh[j]; 225 hes[j] = lhh[j]; 226 } 227 hh[loc_it] = lhh[loc_it] + shift; 228 hes[loc_it] = lhh[loc_it] + shift; 229 230 /* we delay applying the shift here */ 231 for (j=0; j<=loc_it; j++) { 232 lhh[j] = -lhh[j]; /* flip sign */ 233 } 234 235 /* Compute the norm of the un-normalized new direction using the rearranged formula 236 Note that these are shifted ("barred") quantities */ 237 for(k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]))); 238 if (tt < 0.0) { 239 /* If we detect square root breakdown in the norm, we must restart the algorithm. 240 Here this means we simply break the current loop and reconstruct the solution 241 using the basis we have computed thus far. Note that by breaking immediately, 242 we do not update the iteration count, so computation done in this iteration 243 should be disregarded. 244 */ 245 ierr = PetscInfo1(ksp,"Restart due to square root breakdown at it = \n",ksp->its);CHKERRQ(ierr); 246 break; 247 } else { 248 tt = PetscSqrtReal(tt); 249 } 250 251 /* new entry in hessenburg is the 2-norm of our new direction */ 252 hh[loc_it+1] = tt; 253 hes[loc_it+1] = tt; 254 255 /* The recurred computation for the new direction 256 The division by tt is delayed to the happy breakdown check later 257 Note placement BEFORE the unshift 258 */ 259 ierr = VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1));CHKERRQ(ierr); 260 ierr = VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0));CHKERRQ(ierr); 261 /* (VEC_VV(loc_it+1) is not normalized yet) */ 262 263 /* The recurred computation for the preconditioned vector (u) */ 264 ierr = VecCopy(Q,PREVEC(loc_it+1));CHKERRQ(ierr); 265 ierr = VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0));CHKERRQ(ierr); 266 ierr = VecScale(PREVEC(loc_it+1),1.0/tt);CHKERRQ(ierr); 267 268 /* Unshift an entry in the GS coefficients ("removing the bar") */ 269 lhh[loc_it] -= shift; 270 271 /* The recurred computation for z (Au) 272 Note placement AFTER the "unshift" */ 273 ierr = VecCopy(W,ZVEC(loc_it+1));CHKERRQ(ierr); 274 ierr = VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0));CHKERRQ(ierr); 275 ierr = VecScale(ZVEC(loc_it+1),1.0/tt);CHKERRQ(ierr); 276 277 /* Happy Breakdown Check */ 278 hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); 279 /* RS(loc_it) contains the res_norm from the last iteration */ 280 hapbnd = PetscMin(pipefgmres->haptol,hapbnd); 281 if (tt > hapbnd) { 282 /* scale new direction by its norm */ 283 ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr); 284 } else { 285 /* This happens when the solution is exactly reached. */ 286 /* So there is no new direction... */ 287 ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); /* set VEC_TEMP to 0 */ 288 hapend = PETSC_TRUE; 289 } 290 /* note that for pipefgmres we could get HES(loc_it+1, loc_it) = 0 and the 291 current solution would not be exact if HES was singular. Note that 292 HH non-singular implies that HES is not singular, and HES is guaranteed 293 to be nonsingular when PREVECS are linearly independent and A is 294 nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity 295 of HES). So we should really add a check to verify that HES is nonsingular.*/ 296 297 /* Note that to be thorough, in debug mode, one could call a LAPACK routine 298 here to check that the hessenberg matrix is indeed non-singular (since 299 FGMRES does not guarantee this) */ 300 301 /* Now apply rotations to new col of hessenberg (and right side of system), 302 calculate new rotation, and get new residual norm at the same time*/ 303 ierr = KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm);CHKERRQ(ierr); 304 if (ksp->reason) break; 305 306 loc_it++; 307 pipefgmres->it = (loc_it-1); /* Add this here in case it has converged */ 308 309 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); 310 ksp->its++; 311 ksp->rnorm = res_norm; 312 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); 313 314 ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); 315 316 /* Catch error in happy breakdown and signal convergence and break from loop */ 317 if (hapend) { 318 if (!ksp->reason) { 319 if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm); 320 else { 321 ksp->reason = KSP_DIVERGED_BREAKDOWN; 322 break; 323 } 324 } 325 } 326 } 327 /* END OF ITERATION LOOP */ 328 ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); 329 330 /* 331 Monitor if we know that we will not return for a restart */ 332 if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) { 333 ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); 334 } 335 336 if (itcount) *itcount = loc_it; 337 338 /* 339 Down here we have to solve for the "best" coefficients of the Krylov 340 columns, add the solution values together, and possibly unwind the 341 preconditioning from the solution 342 */ 343 344 /* Form the solution (or the solution so far) */ 345 /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln 346 properly navigates */ 347 348 ierr = KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr); 349 350 PetscFunctionReturn(0); 351 } 352 353 /* 354 KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method. 355 356 357 Input Parameter: 358 . ksp - the Krylov space object that was set to use pipefgmres 359 360 Output Parameter: 361 . outits - number of iterations used 362 363 */ 364 #undef __FUNCT__ 365 #define __FUNCT__ "KSPSolve_PIPEFGMRES" 366 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) 367 { 368 PetscErrorCode ierr; 369 PetscInt its,itcount; 370 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 371 PetscBool guess_zero = ksp->guess_zero; 372 373 PetscFunctionBegin; 374 375 /* We have not checked these routines for use with complex numbers. The inner products 376 are likely not defined correctly for that case */ 377 #if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX)) 378 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars"); 379 #endif 380 381 ierr = PetscCitationsRegister(citation,&cited);CHKERRQ(ierr); 382 383 if (ksp->calc_sings && !pipefgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); 384 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); 385 ksp->its = 0; 386 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); 387 388 itcount = 0; 389 ksp->reason = KSP_CONVERGED_ITERATING; 390 while (!ksp->reason) { 391 ierr = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);CHKERRQ(ierr); 392 ierr = KSPPIPEFGMRESCycle(&its,ksp);CHKERRQ(ierr); 393 itcount += its; 394 if (itcount >= ksp->max_it) { 395 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; 396 break; 397 } 398 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ 399 } 400 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ 401 PetscFunctionReturn(0); 402 } 403 404 #undef __FUNCT__ 405 #define __FUNCT__ "KSPDestroy_PIPEFGMRES" 406 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) 407 { 408 PetscErrorCode ierr; 409 410 PetscFunctionBegin; 411 ierr = KSPReset_PIPEFGMRES(ksp);CHKERRQ(ierr); 412 ierr = KSPDestroy_GMRES(ksp);CHKERRQ(ierr); 413 PetscFunctionReturn(0); 414 } 415 416 /* 417 KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the 418 current iterates. 419 420 Input parameters: 421 nrs - work area of size it + 1. 422 vguess - index of initial guess 423 vdest - index of result. Note that vguess may == vdest (replace 424 guess with the solution). 425 it - HH upper triangular part is a block of size (it+1) x (it+1) 426 427 This is an internal routine that knows about the PIPEFGMRES internals. 428 */ 429 #undef __FUNCT__ 430 #define __FUNCT__ "KSPPIPEFGMRESBuildSoln" 431 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it) 432 { 433 PetscScalar tt; 434 PetscErrorCode ierr; 435 PetscInt k,j; 436 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 437 438 PetscFunctionBegin; 439 /* Solve for solution vector that minimizes the residual */ 440 441 if (it < 0) { /* no pipefgmres steps have been performed */ 442 ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exits immediately if vguess == vdest */ 443 PetscFunctionReturn(0); 444 } 445 446 /* solve the upper triangular system - RS is the right side and HH is 447 the upper triangular matrix - put soln in nrs */ 448 if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it); 449 else nrs[it] = 0.0; 450 451 for (k=it-1; k>=0; k--) { 452 tt = *RS(k); 453 for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j]; 454 nrs[k] = tt / *HH(k,k); 455 } 456 457 /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */ 458 ierr = VecZeroEntries(VEC_TEMP);CHKERRQ(ierr); 459 ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));CHKERRQ(ierr); 460 461 /* add solution to previous solution */ 462 if (vdest == vguess) { 463 ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr); 464 } else { 465 ierr = VecWAXPY(vdest,1.0,VEC_TEMP,vguess);CHKERRQ(ierr); 466 } 467 PetscFunctionReturn(0); 468 } 469 470 /* 471 472 KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization. 473 Return new residual. 474 475 input parameters: 476 477 . ksp - Krylov space object 478 . it - plane rotations are applied to the (it+1)th column of the 479 modified hessenberg (i.e. HH(:,it)) 480 . hapend - PETSC_FALSE not happy breakdown ending. 481 482 output parameters: 483 . res - the new residual 484 485 */ 486 #undef __FUNCT__ 487 #define __FUNCT__ "KSPPIPEFGMRESUpdateHessenberg" 488 /* 489 . it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this 490 */ 491 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res) 492 { 493 PetscScalar *hh,*cc,*ss,*rs; 494 PetscInt j; 495 PetscReal hapbnd; 496 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 497 PetscErrorCode ierr; 498 499 PetscFunctionBegin; 500 hh = HH(0,it); /* pointer to beginning of column to update */ 501 cc = CC(0); /* beginning of cosine rotations */ 502 ss = SS(0); /* beginning of sine rotations */ 503 rs = RS(0); /* right hand side of least squares system */ 504 505 /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ 506 for (j=0; j<=it+1; j++) *HES(j,it) = hh[j]; 507 508 /* check for the happy breakdown */ 509 hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol); 510 if (PetscAbsScalar(hh[it+1]) < hapbnd) { 511 ierr = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr); 512 *hapend = PETSC_TRUE; 513 } 514 515 /* Apply all the previously computed plane rotations to the new column 516 of the Hessenberg matrix */ 517 /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), 518 and some refs have [c s ; -conj(s) c] (don't be confused!) */ 519 520 for (j=0; j<it; j++) { 521 PetscScalar hhj = hh[j]; 522 hh[j] = PetscConj(cc[j])*hhj + ss[j]*hh[j+1]; 523 hh[j+1] = -ss[j] *hhj + cc[j]*hh[j+1]; 524 } 525 526 /* 527 compute the new plane rotation, and apply it to: 528 1) the right-hand-side of the Hessenberg system (RS) 529 note: it affects RS(it) and RS(it+1) 530 2) the new column of the Hessenberg matrix 531 note: it affects HH(it,it) which is currently pointed to 532 by hh and HH(it+1, it) (*(hh+1)) 533 thus obtaining the updated value of the residual... 534 */ 535 536 /* compute new plane rotation */ 537 538 if (!*hapend) { 539 PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1]))); 540 if (delta == 0.0) { 541 ksp->reason = KSP_DIVERGED_NULL; 542 PetscFunctionReturn(0); 543 } 544 545 cc[it] = hh[it] / delta; /* new cosine value */ 546 ss[it] = hh[it+1] / delta; /* new sine value */ 547 548 hh[it] = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1]; 549 rs[it+1] = -ss[it]*rs[it]; 550 rs[it] = PetscConj(cc[it])*rs[it]; 551 *res = PetscAbsScalar(rs[it+1]); 552 } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply 553 another rotation matrix (so RH doesn't change). The new residual is 554 always the new sine term times the residual from last time (RS(it)), 555 but now the new sine rotation would be zero...so the residual should 556 be zero...so we will multiply "zero" by the last residual. This might 557 not be exactly what we want to do here -could just return "zero". */ 558 559 *res = 0.0; 560 } 561 PetscFunctionReturn(0); 562 } 563 564 /* 565 KSPBuildSolution_PIPEFGMRES 566 567 Input Parameter: 568 . ksp - the Krylov space object 569 . ptr- 570 571 Output Parameter: 572 . result - the solution 573 574 Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle 575 calls directly. 576 577 */ 578 #undef __FUNCT__ 579 #define __FUNCT__ "KSPBuildSolution_PIPEFGMRES" 580 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result) 581 { 582 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 583 PetscErrorCode ierr; 584 585 PetscFunctionBegin; 586 if (!ptr) { 587 if (!pipefgmres->sol_temp) { 588 ierr = VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp);CHKERRQ(ierr); 589 ierr = PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp);CHKERRQ(ierr); 590 } 591 ptr = pipefgmres->sol_temp; 592 } 593 if (!pipefgmres->nrs) { 594 /* allocate the work area */ 595 ierr = PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs);CHKERRQ(ierr); 596 ierr = PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar));CHKERRQ(ierr); 597 } 598 599 ierr = KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it);CHKERRQ(ierr); 600 if (result) *result = ptr; 601 PetscFunctionReturn(0); 602 } 603 604 #undef __FUNCT__ 605 #define __FUNCT__ "KSPSetFromOptions_PIPEFGMRES" 606 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp) 607 { 608 PetscErrorCode ierr; 609 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 610 PetscBool flg; 611 PetscScalar shift; 612 613 PetscFunctionBegin; 614 ierr = KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);CHKERRQ(ierr); 615 ierr = PetscOptionsHead(PetscOptionsObject,"KSP pipelined FGMRES Options");CHKERRQ(ierr); 616 ierr = PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg);CHKERRQ(ierr); 617 if (flg) { ierr = KSPPIPEFGMRESSetShift(ksp,shift);CHKERRQ(ierr); } 618 ierr = PetscOptionsTail();CHKERRQ(ierr); 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "KSPView_PIPEFGMRES" 624 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer) 625 { 626 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 627 PetscErrorCode ierr; 628 PetscBool iascii,isstring; 629 630 PetscFunctionBegin; 631 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 632 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 633 634 if (iascii) { 635 ierr = PetscViewerASCIIPrintf(viewer," GMRES: restart=%D\n",pipefgmres->max_k);CHKERRQ(ierr); 636 ierr = PetscViewerASCIIPrintf(viewer," GMRES: happy breakdown tolerance %g\n",(double)pipefgmres->haptol);CHKERRQ(ierr); 637 #if defined(PETSC_USE_COMPLEX) 638 ierr = PetscViewerASCIIPrintf(viewer," PIPEFGMRES: shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr); 639 #else 640 ierr = PetscViewerASCIIPrintf(viewer," PIPEFGMRES: shift=%g\n",pipefgmres->shift);CHKERRQ(ierr); 641 #endif 642 } else if (isstring) { 643 ierr = PetscViewerStringSPrintf(viewer,"restart %D",pipefgmres->max_k);CHKERRQ(ierr); 644 #if defined(PETSC_USE_COMPLEX) 645 ierr = PetscViewerStringSPrintf(viewer," PIPEFGMRES: shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr); 646 #else 647 ierr = PetscViewerStringSPrintf(viewer," PIPEFGMRES: shift=%g\n",pipefgmres->shift);CHKERRQ(ierr); 648 #endif 649 } 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "KSPReset_PIPEFGMRES" 655 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) 656 { 657 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 658 PetscErrorCode ierr; 659 PetscInt i; 660 661 PetscFunctionBegin; 662 ierr = PetscFree(pipefgmres->prevecs);CHKERRQ(ierr); 663 ierr = PetscFree(pipefgmres->zvecs);CHKERRQ(ierr); 664 for (i=0; i<pipefgmres->nwork_alloc; i++) { 665 ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i]);CHKERRQ(ierr); 666 ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i]);CHKERRQ(ierr); 667 } 668 ierr = PetscFree(pipefgmres->prevecs_user_work);CHKERRQ(ierr); 669 ierr = PetscFree(pipefgmres->zvecs_user_work);CHKERRQ(ierr); 670 ierr = PetscFree(pipefgmres->redux);CHKERRQ(ierr); 671 ierr = KSPReset_GMRES(ksp);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 /*MC 676 KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method. 677 678 A flexible, 1-stage pipelined variant of GMRES. 679 680 Options Database Keys: 681 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against 682 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence) 683 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of 684 . -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift() 685 vectors are allocated as needed) 686 - -ksp_gmres_krylov_monitor - plot the Krylov space generated 687 688 689 Level: intermediate 690 691 Notes: 692 693 This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter. 694 695 A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator. 696 697 Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES). 698 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods. 699 See the FAQ on the PETSc website for details. 700 701 Developer Notes: This class is subclassed off of KSPGMRES. 702 703 Reference: 704 P. Sanan, S.M. Schnepp, and D.A. May, 705 "Pipelined, Flexible Krylov Subspace Methods," 706 SIAM Journal on Scientific Computing 2016 38:5, C441-C470, 707 DOI: 10.1137/15M1049130 708 709 .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPLGMRES, KSPPIPECG, KSPPIPECR, KSPPGMRES, KSPFGMRES 710 KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESMonitorKrylov(), KSPPIPEFGMRESSetShift() 711 M*/ 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "KSPCreate_PIPEFGMRES" 715 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) 716 { 717 KSP_PIPEFGMRES *pipefgmres; 718 PetscErrorCode ierr; 719 720 PetscFunctionBegin; 721 ierr = PetscNewLog(ksp,&pipefgmres);CHKERRQ(ierr); 722 723 ksp->data = (void*)pipefgmres; 724 ksp->ops->buildsolution = KSPBuildSolution_PIPEFGMRES; 725 ksp->ops->setup = KSPSetUp_PIPEFGMRES; 726 ksp->ops->solve = KSPSolve_PIPEFGMRES; 727 ksp->ops->reset = KSPReset_PIPEFGMRES; 728 ksp->ops->destroy = KSPDestroy_PIPEFGMRES; 729 ksp->ops->view = KSPView_PIPEFGMRES; 730 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFGMRES; 731 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES; 732 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES; 733 734 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3);CHKERRQ(ierr); 735 736 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);CHKERRQ(ierr); 737 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);CHKERRQ(ierr); 738 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES);CHKERRQ(ierr); 739 740 pipefgmres->nextra_vecs = 1; 741 pipefgmres->haptol = 1.0e-30; 742 pipefgmres->q_preallocate = 0; 743 pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS; 744 pipefgmres->orthog = 0; 745 pipefgmres->nrs = 0; 746 pipefgmres->sol_temp = 0; 747 pipefgmres->max_k = PIPEFGMRES_DEFAULT_MAXK; 748 pipefgmres->Rsvd = 0; 749 pipefgmres->orthogwork = 0; 750 pipefgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER; 751 pipefgmres->shift = 1.0; 752 PetscFunctionReturn(0); 753 } 754 755 #undef __FUNCT__ 756 #define __FUNCT__ "KSPPIPEFGMRESGetNewVectors" 757 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it) 758 { 759 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 760 PetscInt nwork = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */ 761 PetscInt nalloc; /* number to allocate */ 762 PetscErrorCode ierr; 763 PetscInt k; 764 765 PetscFunctionBegin; 766 nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate 767 in a single chunk */ 768 769 /* Adjust the number to allocate to make sure that we don't exceed the 770 number of available slots (pipefgmres->vecs_allocated)*/ 771 if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) { 772 nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET; 773 } 774 if (!nalloc) PetscFunctionReturn(0); 775 776 pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */ 777 778 /* work vectors */ 779 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL);CHKERRQ(ierr); 780 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork]);CHKERRQ(ierr); 781 for (k=0; k < nalloc; k++) { 782 pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k]; 783 } 784 /* specify size of chunk allocated */ 785 pipefgmres->mwork_alloc[nwork] = nalloc; 786 787 /* preconditioned vectors (note we don't use VEC_OFFSET) */ 788 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL);CHKERRQ(ierr); 789 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork]);CHKERRQ(ierr); 790 for (k=0; k < nalloc; k++) { 791 pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k]; 792 } 793 794 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL);CHKERRQ(ierr); 795 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork]);CHKERRQ(ierr); 796 for (k=0; k < nalloc; k++) { 797 pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k]; 798 } 799 800 /* increment the number of work vector chunks */ 801 pipefgmres->nwork_alloc++; 802 PetscFunctionReturn(0); 803 } 804 #undef __FUNCT__ 805 #define __FUNCT__ "KSPPIPEFGMRESSetShift" 806 /*@ 807 KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver. 808 809 A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there). 810 811 Logically Collective on KSP 812 813 Input Parameters: 814 + ksp - the Krylov space context 815 - shift - the shift 816 817 Level: intermediate 818 819 Options Database: 820 . -ksp_pipefgmres_shift <shift> 821 822 .seealso: KSPComputeEigenvalues() 823 @*/ 824 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift) 825 { 826 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 827 828 PetscFunctionBegin; 829 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1); 830 PetscValidLogicalCollectiveScalar(ksp,shift,2); 831 pipefgmres->shift = shift; 832 PetscFunctionReturn(0); 833 } 834