1 /* 2 Contributed by Patrick Sanan and Sascha M. Schnepp 3 */ 4 5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I "petscksp.h" I*/ 6 7 #define PIPEFGMRES_DELTA_DIRECTIONS 10 8 #define PIPEFGMRES_DEFAULT_MAXK 30 9 10 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt); 11 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*); 12 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt); 13 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP); 14 15 /* 16 17 KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres. 18 19 This is called once, usually automatically by KSPSolve() or KSPSetUp(), 20 but can be called directly by KSPSetUp(). 21 22 */ 23 #undef __FUNCT__ 24 #define __FUNCT__ "KSPSetUp_PIPEFGMRES" 25 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) 26 { 27 PetscErrorCode ierr; 28 PetscInt k; 29 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 30 const PetscInt max_k = pipefgmres->max_k; 31 32 PetscFunctionBegin; 33 ierr = KSPSetUp_GMRES(ksp);CHKERRQ(ierr); 34 35 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs);CHKERRQ(ierr); 36 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work);CHKERRQ(ierr); 37 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr); 38 39 ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL);CHKERRQ(ierr); 40 ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0]);CHKERRQ(ierr); 41 for (k=0; k < pipefgmres->vv_allocated; k++) { 42 pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k]; 43 } 44 45 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs);CHKERRQ(ierr); 46 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work);CHKERRQ(ierr); 47 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr); 48 49 ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux);CHKERRQ(ierr); 50 ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)));CHKERRQ(ierr); 51 52 ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL);CHKERRQ(ierr); 53 ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0]);CHKERRQ(ierr); 54 for (k=0; k < pipefgmres->vv_allocated; k++) { 55 pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k]; 56 } 57 58 PetscFunctionReturn(0); 59 } 60 61 /* 62 63 KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart. Return residual 64 history if requested. 65 66 input parameters: 67 . pipefgmres - structure containing parameters and work areas 68 69 output parameters: 70 . itcount - number of iterations used. If null, ignored. 71 . converged - 0 if not converged 72 73 Notes: 74 On entry, the value in vector VEC_VV(0) should be 75 the initial residual. 76 77 78 */ 79 80 #undef __FUNCT__ 81 #define __FUNCT__ "KSPPIPEFGMRESCycle" 82 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp) 83 { 84 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 85 PetscReal res_norm; 86 PetscReal hapbnd,tt; 87 PetscScalar *hh,*hes,*lhh,shift = pipefgmres->shift; 88 PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ 89 PetscErrorCode ierr; 90 PetscInt loc_it; /* local count of # of dir. in Krylov space */ 91 PetscInt max_k = pipefgmres->max_k; /* max # of directions Krylov space */ 92 PetscInt i,j,k; 93 Mat Amat,Pmat; 94 Vec Q,W; /* Pipelining vectors */ 95 Vec *redux = pipefgmres->redux; /* workspace for single reduction */ 96 97 PetscFunctionBegin; 98 99 /* We have not checked these routines for use with complex numbers. The inner products 100 are likely not defined correctly for that case */ 101 #if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX)) 102 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars"); 103 #endif 104 105 if (itcount) *itcount = 0; 106 107 /* Assign simpler names to these vectors, allocated as pipelining workspace */ 108 Q = VEC_Q; 109 W = VEC_W; 110 111 /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/ 112 /* Note that we add an extra value here to allow for a single reduction */ 113 if (!pipefgmres->orthogwork) { ierr = PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork);CHKERRQ(ierr); 114 } 115 lhh = pipefgmres->orthogwork; 116 117 /* Number of pseudo iterations since last restart is the number 118 of prestart directions */ 119 loc_it = 0; 120 121 /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in 122 KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln. 123 Note that when KSPPIPEFGMRESBuildSoln is called from this function, 124 (loc_it -1) is passed, so the two are equivalent */ 125 pipefgmres->it = (loc_it - 1); 126 127 /* initial residual is in VEC_VV(0) - compute its norm*/ 128 ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr); 129 130 /* first entry in right-hand-side of hessenberg system is just 131 the initial residual norm */ 132 *RS(0) = res_norm; 133 134 ksp->rnorm = res_norm; 135 ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); 136 ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); 137 138 /* check for the convergence - maybe the current guess is good enough */ 139 ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); 140 if (ksp->reason) { 141 if (itcount) *itcount = 0; 142 PetscFunctionReturn(0); 143 } 144 145 /* scale VEC_VV (the initial residual) */ 146 ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr); 147 148 /* Fill the pipeline */ 149 ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr); 150 ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); 151 ierr = KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it));CHKERRQ(ierr); 152 ierr = VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it));CHKERRQ(ierr); /* Note shift */ 153 154 /* MAIN ITERATION LOOP BEGINNING*/ 155 /* keep iterating until we have converged OR generated the max number 156 of directions OR reached the max number of iterations for the method */ 157 while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { 158 if (loc_it) { 159 ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); 160 ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); 161 } 162 pipefgmres->it = (loc_it - 1); 163 164 /* see if more space is needed for work vectors */ 165 if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { 166 ierr = KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr); 167 /* (loc_it+1) is passed in as number of the first vector that should 168 be allocated */ 169 } 170 171 /* Note that these inner products are with "Z" now, so 172 in particular, lhh[loc_it] is the 'barred' or 'shifted' value, 173 not the value from the equivalent FGMRES run (even in exact arithmetic) 174 That is, the H we need for the Arnoldi relation is different from the 175 coefficients we use in the orthogonalization process,because of the shift */ 176 177 /* Do some local twiddling to allow for a single reduction */ 178 for(i=0;i<loc_it+1;i++){ 179 redux[i] = VEC_VV(i); 180 } 181 redux[loc_it+1] = ZVEC(loc_it); 182 183 /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */ 184 ierr = VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr); 185 186 /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/ 187 ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)));CHKERRQ(ierr); 188 189 /* The work to be overlapped with the inner products follows. 190 This is application of the preconditioner and the operator 191 to compute intermediate quantites which will be combined (locally) 192 with the results of the inner products. 193 */ 194 ierr = KSP_PCApply(ksp,ZVEC(loc_it),Q);CHKERRQ(ierr); 195 ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); 196 ierr = KSP_MatMult(ksp,Amat,Q,W);CHKERRQ(ierr); 197 198 /* Compute inner products of the new direction with previous directions, 199 and the norm of the to-be-orthogonalized direction "Z". 200 This information is enough to build the required entries 201 of H. The inner product with VEC_VV(it_loc) is 202 *different* than in the standard FGMRES and need to be dealt with specially. 203 That is, for standard FGMRES the orthogonalization coefficients are the same 204 as the coefficients used in the Arnoldi relation to reconstruct, but here this 205 is not true (albeit only for the one entry of H which we "unshift" below. */ 206 207 /* Finish the dot product, retrieving the extra entry */ 208 ierr = VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr); 209 tt = PetscRealPart(lhh[loc_it+1]); 210 211 /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt 212 Note that the Hessenberg entries require a shift, as these are for the 213 relation AU = VH, which is wrt unshifted basis vectors */ 214 hh = HH(0,loc_it); hes=HES(0,loc_it); 215 for (j=0; j<loc_it; j++) { 216 hh[j] = lhh[j]; 217 hes[j] = lhh[j]; 218 } 219 hh[loc_it] = lhh[loc_it] + shift; 220 hes[loc_it] = lhh[loc_it] + shift; 221 222 /* we delay applying the shift here */ 223 for (j=0; j<=loc_it; j++) { 224 lhh[j] = -lhh[j]; /* flip sign */ 225 } 226 227 /* Compute the norm of the un-normalized new direction using the rearranged formula 228 Note that these are shifted ("barred") quantities */ 229 for(k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]))); 230 if (tt < 0.0) { 231 /* If we detect square root breakdown in the norm, we must restart the algorithm. 232 Here this means we simply break the current loop and reconstruct the solution 233 using the basis we have computed thus far. Note that by breaking immediately, 234 we do not update the iteration count, so computation done in this iteration 235 should be disregarded. 236 */ 237 ierr = PetscInfo1(ksp,"Restart due to square root breakdown at it = \n",ksp->its);CHKERRQ(ierr); 238 break; 239 } else { 240 tt = PetscSqrtReal(tt); 241 } 242 243 /* new entry in hessenburg is the 2-norm of our new direction */ 244 hh[loc_it+1] = tt; 245 hes[loc_it+1] = tt; 246 247 /* The recurred computation for the new direction 248 The division by tt is delayed to the happy breakdown check later 249 Note placement BEFORE the unshift 250 */ 251 ierr = VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1));CHKERRQ(ierr); 252 ierr = VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0));CHKERRQ(ierr); 253 /* (VEC_VV(loc_it+1) is not normalized yet) */ 254 255 /* The recurred computation for the preconditioned vector (u) */ 256 ierr = VecCopy(Q,PREVEC(loc_it+1));CHKERRQ(ierr); 257 ierr = VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0));CHKERRQ(ierr); 258 ierr = VecScale(PREVEC(loc_it+1),1.0/tt);CHKERRQ(ierr); 259 260 /* Unshift an entry in the GS coefficients ("removing the bar") */ 261 lhh[loc_it] -= shift; 262 263 /* The recurred computation for z (Au) 264 Note placement AFTER the "unshift" */ 265 ierr = VecCopy(W,ZVEC(loc_it+1));CHKERRQ(ierr); 266 ierr = VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0));CHKERRQ(ierr); 267 ierr = VecScale(ZVEC(loc_it+1),1.0/tt);CHKERRQ(ierr); 268 269 /* Happy Breakdown Check */ 270 hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); 271 /* RS(loc_it) contains the res_norm from the last iteration */ 272 hapbnd = PetscMin(pipefgmres->haptol,hapbnd); 273 if (tt > hapbnd) { 274 /* scale new direction by its norm */ 275 ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr); 276 } else { 277 /* This happens when the solution is exactly reached. */ 278 /* So there is no new direction... */ 279 ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); /* set VEC_TEMP to 0 */ 280 hapend = PETSC_TRUE; 281 } 282 /* note that for pipefgmres we could get HES(loc_it+1, loc_it) = 0 and the 283 current solution would not be exact if HES was singular. Note that 284 HH non-singular implies that HES is not singular, and HES is guaranteed 285 to be nonsingular when PREVECS are linearly independent and A is 286 nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity 287 of HES). So we should really add a check to verify that HES is nonsingular.*/ 288 289 /* Note that to be thorough, in debug mode, one could call a LAPACK routine 290 here to check that the hessenberg matrix is indeed non-singular (since 291 FGMRES does not guarantee this) */ 292 293 /* Now apply rotations to new col of hessenberg (and right side of system), 294 calculate new rotation, and get new residual norm at the same time*/ 295 ierr = KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm);CHKERRQ(ierr); 296 if (ksp->reason) break; 297 298 loc_it++; 299 pipefgmres->it = (loc_it-1); /* Add this here in case it has converged */ 300 301 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); 302 ksp->its++; 303 ksp->rnorm = res_norm; 304 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); 305 306 ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); 307 308 /* Catch error in happy breakdown and signal convergence and break from loop */ 309 if (hapend) { 310 if (!ksp->reason) { 311 if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm); 312 else { 313 ksp->reason = KSP_DIVERGED_BREAKDOWN; 314 break; 315 } 316 } 317 } 318 } 319 /* END OF ITERATION LOOP */ 320 ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); 321 322 /* 323 Monitor if we know that we will not return for a restart */ 324 if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) { 325 ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); 326 } 327 328 if (itcount) *itcount = loc_it; 329 330 /* 331 Down here we have to solve for the "best" coefficients of the Krylov 332 columns, add the solution values together, and possibly unwind the 333 preconditioning from the solution 334 */ 335 336 /* Form the solution (or the solution so far) */ 337 /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln 338 properly navigates */ 339 340 ierr = KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr); 341 342 PetscFunctionReturn(0); 343 } 344 345 /* 346 KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method. 347 348 349 Input Parameter: 350 . ksp - the Krylov space object that was set to use pipefgmres 351 352 Output Parameter: 353 . outits - number of iterations used 354 355 */ 356 #undef __FUNCT__ 357 #define __FUNCT__ "KSPSolve_PIPEFGMRES" 358 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) 359 { 360 PetscErrorCode ierr; 361 PetscInt its,itcount; 362 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 363 PetscBool guess_zero = ksp->guess_zero; 364 365 PetscFunctionBegin; 366 if (ksp->calc_sings && !pipefgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called"); 367 ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); 368 ksp->its = 0; 369 ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); 370 371 itcount = 0; 372 ksp->reason = KSP_CONVERGED_ITERATING; 373 while (!ksp->reason) { 374 ierr = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);CHKERRQ(ierr); 375 ierr = KSPPIPEFGMRESCycle(&its,ksp);CHKERRQ(ierr); 376 itcount += its; 377 if (itcount >= ksp->max_it) { 378 if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; 379 break; 380 } 381 ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */ 382 } 383 ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */ 384 PetscFunctionReturn(0); 385 } 386 387 #undef __FUNCT__ 388 #define __FUNCT__ "KSPDestroy_PIPEFGMRES" 389 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) 390 { 391 PetscErrorCode ierr; 392 393 PetscFunctionBegin; 394 ierr = KSPReset_PIPEFGMRES(ksp);CHKERRQ(ierr); 395 ierr = KSPDestroy_GMRES(ksp);CHKERRQ(ierr); 396 PetscFunctionReturn(0); 397 } 398 399 /* 400 KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the 401 current iterates. 402 403 Input parameters: 404 nrs - work area of size it + 1. 405 vguess - index of initial guess 406 vdest - index of result. Note that vguess may == vdest (replace 407 guess with the solution). 408 it - HH upper triangular part is a block of size (it+1) x (it+1) 409 410 This is an internal routine that knows about the PIPEFGMRES internals. 411 */ 412 #undef __FUNCT__ 413 #define __FUNCT__ "KSPPIPEFGMRESBuildSoln" 414 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it) 415 { 416 PetscScalar tt; 417 PetscErrorCode ierr; 418 PetscInt k,j; 419 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 420 421 PetscFunctionBegin; 422 /* Solve for solution vector that minimizes the residual */ 423 424 if (it < 0) { /* no pipefgmres steps have been performed */ 425 ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exits immediately if vguess == vdest */ 426 PetscFunctionReturn(0); 427 } 428 429 /* solve the upper triangular system - RS is the right side and HH is 430 the upper triangular matrix - put soln in nrs */ 431 if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it); 432 else nrs[it] = 0.0; 433 434 for (k=it-1; k>=0; k--) { 435 tt = *RS(k); 436 for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j]; 437 nrs[k] = tt / *HH(k,k); 438 } 439 440 /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */ 441 ierr = VecZeroEntries(VEC_TEMP);CHKERRQ(ierr); 442 ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));CHKERRQ(ierr); 443 444 /* add solution to previous solution */ 445 if (vdest == vguess) { 446 ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr); 447 } else { 448 ierr = VecWAXPY(vdest,1.0,VEC_TEMP,vguess);CHKERRQ(ierr); 449 } 450 PetscFunctionReturn(0); 451 } 452 453 /* 454 455 KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization. 456 Return new residual. 457 458 input parameters: 459 460 . ksp - Krylov space object 461 . it - plane rotations are applied to the (it+1)th column of the 462 modified hessenberg (i.e. HH(:,it)) 463 . hapend - PETSC_FALSE not happy breakdown ending. 464 465 output parameters: 466 . res - the new residual 467 468 */ 469 #undef __FUNCT__ 470 #define __FUNCT__ "KSPPIPEFGMRESUpdateHessenberg" 471 /* 472 . it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this 473 */ 474 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res) 475 { 476 PetscScalar *hh,*cc,*ss,*rs; 477 PetscInt j; 478 PetscReal hapbnd; 479 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); 480 PetscErrorCode ierr; 481 482 PetscFunctionBegin; 483 hh = HH(0,it); /* pointer to beginning of column to update */ 484 cc = CC(0); /* beginning of cosine rotations */ 485 ss = SS(0); /* beginning of sine rotations */ 486 rs = RS(0); /* right hand side of least squares system */ 487 488 /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */ 489 for (j=0; j<=it+1; j++) *HES(j,it) = hh[j]; 490 491 /* check for the happy breakdown */ 492 hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol); 493 if (PetscAbsScalar(hh[it+1]) < hapbnd) { 494 ierr = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr); 495 *hapend = PETSC_TRUE; 496 } 497 498 /* Apply all the previously computed plane rotations to the new column 499 of the Hessenberg matrix */ 500 /* Note: this uses the rotation [conj(c) s ; -s c], c= cos(theta), s= sin(theta), 501 and some refs have [c s ; -conj(s) c] (don't be confused!) */ 502 503 for (j=0; j<it; j++) { 504 PetscScalar hhj = hh[j]; 505 hh[j] = PetscConj(cc[j])*hhj + ss[j]*hh[j+1]; 506 hh[j+1] = -ss[j] *hhj + cc[j]*hh[j+1]; 507 } 508 509 /* 510 compute the new plane rotation, and apply it to: 511 1) the right-hand-side of the Hessenberg system (RS) 512 note: it affects RS(it) and RS(it+1) 513 2) the new column of the Hessenberg matrix 514 note: it affects HH(it,it) which is currently pointed to 515 by hh and HH(it+1, it) (*(hh+1)) 516 thus obtaining the updated value of the residual... 517 */ 518 519 /* compute new plane rotation */ 520 521 if (!*hapend) { 522 PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1]))); 523 if (delta == 0.0) { 524 ksp->reason = KSP_DIVERGED_NULL; 525 PetscFunctionReturn(0); 526 } 527 528 cc[it] = hh[it] / delta; /* new cosine value */ 529 ss[it] = hh[it+1] / delta; /* new sine value */ 530 531 hh[it] = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1]; 532 rs[it+1] = -ss[it]*rs[it]; 533 rs[it] = PetscConj(cc[it])*rs[it]; 534 *res = PetscAbsScalar(rs[it+1]); 535 } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply 536 another rotation matrix (so RH doesn't change). The new residual is 537 always the new sine term times the residual from last time (RS(it)), 538 but now the new sine rotation would be zero...so the residual should 539 be zero...so we will multiply "zero" by the last residual. This might 540 not be exactly what we want to do here -could just return "zero". */ 541 542 *res = 0.0; 543 } 544 PetscFunctionReturn(0); 545 } 546 547 /* 548 KSPBuildSolution_PIPEFGMRES 549 550 Input Parameter: 551 . ksp - the Krylov space object 552 . ptr- 553 554 Output Parameter: 555 . result - the solution 556 557 Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle 558 calls directly. 559 560 */ 561 #undef __FUNCT__ 562 #define __FUNCT__ "KSPBuildSolution_PIPEFGMRES" 563 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result) 564 { 565 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 566 PetscErrorCode ierr; 567 568 PetscFunctionBegin; 569 if (!ptr) { 570 if (!pipefgmres->sol_temp) { 571 ierr = VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp);CHKERRQ(ierr); 572 ierr = PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp);CHKERRQ(ierr); 573 } 574 ptr = pipefgmres->sol_temp; 575 } 576 if (!pipefgmres->nrs) { 577 /* allocate the work area */ 578 ierr = PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs);CHKERRQ(ierr); 579 ierr = PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar));CHKERRQ(ierr); 580 } 581 582 ierr = KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it);CHKERRQ(ierr); 583 if (result) *result = ptr; 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "KSPSetFromOptions_PIPEFGMRES" 589 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp) 590 { 591 PetscErrorCode ierr; 592 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 593 PetscBool flg; 594 PetscScalar shift; 595 596 PetscFunctionBegin; 597 ierr = KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);CHKERRQ(ierr); 598 ierr = PetscOptionsHead(PetscOptionsObject,"KSP pipelined FGMRES Options");CHKERRQ(ierr); 599 ierr = PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg);CHKERRQ(ierr); 600 if (flg) { ierr = KSPPIPEFGMRESSetShift(ksp,shift);CHKERRQ(ierr); } 601 ierr = PetscOptionsTail();CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 #undef __FUNCT__ 606 #define __FUNCT__ "KSPView_PIPEFGMRES" 607 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer) 608 { 609 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 610 PetscErrorCode ierr; 611 PetscBool iascii,isstring; 612 613 PetscFunctionBegin; 614 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 615 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr); 616 617 if (iascii) { 618 ierr = PetscViewerASCIIPrintf(viewer," GMRES: restart=%D\n",pipefgmres->max_k);CHKERRQ(ierr); 619 ierr = PetscViewerASCIIPrintf(viewer," GMRES: happy breakdown tolerance %g\n",(double)pipefgmres->haptol);CHKERRQ(ierr); 620 #if defined(PETSC_USE_COMPLEX) 621 ierr = PetscViewerASCIIPrintf(viewer," PIPEFGMRES: shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr); 622 #else 623 ierr = PetscViewerASCIIPrintf(viewer," PIPEFGMRES: shift=%g\n",pipefgmres->shift);CHKERRQ(ierr); 624 #endif 625 } else if (isstring) { 626 ierr = PetscViewerStringSPrintf(viewer,"restart %D",pipefgmres->max_k);CHKERRQ(ierr); 627 #if defined(PETSC_USE_COMPLEX) 628 ierr = PetscViewerStringSPrintf(viewer," PIPEFGMRES: shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr); 629 #else 630 ierr = PetscViewerStringSPrintf(viewer," PIPEFGMRES: shift=%g\n",pipefgmres->shift);CHKERRQ(ierr); 631 #endif 632 } 633 PetscFunctionReturn(0); 634 } 635 636 #undef __FUNCT__ 637 #define __FUNCT__ "KSPReset_PIPEFGMRES" 638 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) 639 { 640 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 641 PetscErrorCode ierr; 642 PetscInt i; 643 644 PetscFunctionBegin; 645 ierr = PetscFree(pipefgmres->prevecs);CHKERRQ(ierr); 646 ierr = PetscFree(pipefgmres->zvecs);CHKERRQ(ierr); 647 for (i=0; i<pipefgmres->nwork_alloc; i++) { 648 ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i]);CHKERRQ(ierr); 649 ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i]);CHKERRQ(ierr); 650 } 651 ierr = PetscFree(pipefgmres->prevecs_user_work);CHKERRQ(ierr); 652 ierr = PetscFree(pipefgmres->zvecs_user_work);CHKERRQ(ierr); 653 ierr = PetscFree(pipefgmres->redux);CHKERRQ(ierr); 654 ierr = KSPReset_GMRES(ksp);CHKERRQ(ierr); 655 PetscFunctionReturn(0); 656 } 657 658 /*MC 659 KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method. 660 661 A Flexible, 1-stage pipelined variant of GMRES 662 663 This variant is not "explicitly normalized" like PGMRES, and requires a shift parameter. 664 665 A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator. 666 667 Only right preconditioning is supported (but this preconditioner may be nonlinear, as with FGMRES) 668 669 Options Database Keys: 670 + -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against 671 . -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence) 672 . -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of 673 . -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift() 674 vectors are allocated as needed) 675 - -ksp_gmres_krylov_monitor - plot the Krylov space generated 676 677 678 Level: beginner 679 680 Notes: 681 MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods. 682 See the FAQ on the PETSc website for details. 683 684 Developer Notes: This object is subclassed off of KSPGMRES 685 686 .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPLGMRES, KSPPIPECG, KSPPIPECR, KSPPGMRES, KSPFGMRES 687 KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESMonitorKrylov(), KSPPIPEGMRESSetShift() 688 M*/ 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "KSPCreate_PIPEFGMRES" 692 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) 693 { 694 KSP_PIPEFGMRES *pipefgmres; 695 PetscErrorCode ierr; 696 697 PetscFunctionBegin; 698 ierr = PetscNewLog(ksp,&pipefgmres);CHKERRQ(ierr); 699 700 ksp->data = (void*)pipefgmres; 701 ksp->ops->buildsolution = KSPBuildSolution_PIPEFGMRES; 702 ksp->ops->setup = KSPSetUp_PIPEFGMRES; 703 ksp->ops->solve = KSPSolve_PIPEFGMRES; 704 ksp->ops->reset = KSPReset_PIPEFGMRES; 705 ksp->ops->destroy = KSPDestroy_PIPEFGMRES; 706 ksp->ops->view = KSPView_PIPEFGMRES; 707 ksp->ops->setfromoptions = KSPSetFromOptions_PIPEFGMRES; 708 ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES; 709 ksp->ops->computeeigenvalues = KSPComputeEigenvalues_GMRES; 710 711 ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3);CHKERRQ(ierr); 712 713 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);CHKERRQ(ierr); 714 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);CHKERRQ(ierr); 715 ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES);CHKERRQ(ierr); 716 717 pipefgmres->nextra_vecs = 1; 718 pipefgmres->haptol = 1.0e-30; 719 pipefgmres->q_preallocate = 0; 720 pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS; 721 pipefgmres->orthog = 0; 722 pipefgmres->nrs = 0; 723 pipefgmres->sol_temp = 0; 724 pipefgmres->max_k = PIPEFGMRES_DEFAULT_MAXK; 725 pipefgmres->Rsvd = 0; 726 pipefgmres->orthogwork = 0; 727 pipefgmres->cgstype = KSP_GMRES_CGS_REFINE_NEVER; 728 pipefgmres->shift = 1.0; 729 PetscFunctionReturn(0); 730 } 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "KSPPIPEFGMRESGetNewVectors" 734 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it) 735 { 736 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 737 PetscInt nwork = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */ 738 PetscInt nalloc; /* number to allocate */ 739 PetscErrorCode ierr; 740 PetscInt k; 741 742 PetscFunctionBegin; 743 nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate 744 in a single chunk */ 745 746 /* Adjust the number to allocate to make sure that we don't exceed the 747 number of available slots (pipefgmres->vecs_allocated)*/ 748 if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) { 749 nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET; 750 } 751 if (!nalloc) PetscFunctionReturn(0); 752 753 pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */ 754 755 /* work vectors */ 756 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL);CHKERRQ(ierr); 757 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork]);CHKERRQ(ierr); 758 for (k=0; k < nalloc; k++) { 759 pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k]; 760 } 761 /* specify size of chunk allocated */ 762 pipefgmres->mwork_alloc[nwork] = nalloc; 763 764 /* preconditioned vectors (note we don't use VEC_OFFSET) */ 765 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL);CHKERRQ(ierr); 766 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork]);CHKERRQ(ierr); 767 for (k=0; k < nalloc; k++) { 768 pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k]; 769 } 770 771 ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL);CHKERRQ(ierr); 772 ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork]);CHKERRQ(ierr); 773 for (k=0; k < nalloc; k++) { 774 pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k]; 775 } 776 777 /* increment the number of work vector chunks */ 778 pipefgmres->nwork_alloc++; 779 PetscFunctionReturn(0); 780 } 781 #undef __FUNCT__ 782 #define __FUNCT__ "KSPPIPEFGMRESSetShift" 783 /*@ 784 KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver. 785 786 A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there). 787 788 Logically Collective on KSP 789 790 Input Parameters: 791 + ksp - the Krylov space context 792 - shift - the shift 793 794 Level: intermediate 795 796 Options Database: 797 . -ksp_pipefgmres_shift <shift> 798 799 .seealso: KSPComputeEigenvalues() 800 @*/ 801 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift) 802 { 803 KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data; 804 805 PetscFunctionBegin; 806 PetscValidHeaderSpecific(ksp,KSP_CLASSID,1); 807 PetscValidLogicalCollectiveScalar(ksp,shift,2); 808 pipefgmres->shift = shift; 809 PetscFunctionReturn(0); 810 } 811