1 subroutine SolGMRe (y, ac, yold, acold, 2 & x, iBC, BC, EGmass, 3 & res, BDiag, HBrg, eBrg, 4 & yBrg, Rcos, Rsin, iper, 5 & ilwork, shp, shgl, shpb, 6 & shglb, Dy, rerr) 7c 8c---------------------------------------------------------------------- 9c 10c This is the preconditioned GMRES driver routine. 11c 12c input: 13c y (nshg,ndof) : Y-variables at n+alpha_v 14c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 15c yold (nshg,ndof) : Y-variables at beginning of step 16c acold (nshg,ndof) : Primvar. accel. variable at begng step 17c x (numnp,nsd) : node coordinates 18c iBC (nshg) : BC codes 19c BC (nshg,ndofBC) : BC constraint parameters 20c HBrg (Kspace+1,Kspace) : Hessenberg matrix (LHS matrix) 21c eBrg (Kspace+1) : RHS of Hessenberg minim. problem 22c yBrg (Kspace) : solution of Hessenberg minim. problem 23c Rcos (Kspace) : Rotational cosine of QR algorithm 24c Rsin (Kspace) : Rotational sine of QR algorithm 25c shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 26c shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 27c 28c output: 29c res (nshg,nflow) : preconditioned residual 30c BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 31c 32c 33c Zdenek Johan, Winter 1991. (Fortran 90) 34c---------------------------------------------------------------------- 35c 36 use pointer_data 37 38 include "common.h" 39 include "mpif.h" 40 include "auxmpi.h" 41c 42 dimension y(nshg,ndof), ac(nshg,ndof), 43 & yold(nshg,ndof), acold(nshg,ndof), 44 & x(numnp,nsd), 45 & iBC(nshg), BC(nshg,ndofBC), 46 & res(nshg,nflow), 47 & BDiag(nshg,nflow,nflow), 48 & HBrg(Kspace+1,*), eBrg(*), 49 & yBrg(*), Rcos(*), 50 & Rsin(*), ilwork(nlwork), 51 & iper(nshg), EGmass(numel,nedof,nedof)!, 52ctoomuchmem & Binv(numel,nedof,nedof) 53c 54 dimension Dy(nshg,nflow), rmes(nshg,nflow), 55 & temp(nshg,nflow), 56 & uBrg(nshg,nflow,Kspace+1) 57c 58 dimension shp(MAXTOP,maxsh,MAXQPT), 59 & shgl(MAXTOP,nsd,maxsh,MAXQPT), 60 & shpb(MAXTOP,maxsh,MAXQPT), 61 & shglb(MAXTOP,nsd,maxsh,MAXQPT) 62 real*8 rerr(nshg,10) 63c 64c.... *******************>> Element Data Formation <<****************** 65c 66c 67c.... set the parameters for flux and surface tension calculations 68c 69c 70 idflx = 0 71 if(idiff >= 1) idflx= idflx + (nflow-1) * nsd 72 if (isurf == 1) idflx=idflx + nsd 73c 74c.... form the LHS matrices, the residual vector, and the block 75c diagonal preconditioner 76c 77 call ElmGMRe(y, ac, x, 78 & shp, shgl, iBC, 79 & BC, shpb, 80 & shglb, res, 81 & rmes, BDiag, iper, 82 & ilwork, EGmass, rerr ) 83c 84c.... **********************>> EBE - GMRES <<******************** 85c 86 call timer ('Solver ') 87c 88c.... ------------------------> Initialization <----------------------- 89c 90c 91c.... LU decompose the block diagonals 92c 93 if (iprec .ne. 0) 94 & call i3LU (BDiag, res, 'LU_Fact ') 95 96c 97c.... block diagonal precondition residual 98c 99 call i3LU (BDiag, res, 'forward ') 100c 101c.... initialize Dy 102c 103 Dy = zero 104c 105c.... Pre-precondition the LHS mass matrix and set up the element 106c by element preconditioners 107c 108ctoomuchmemory note that Binv is demoted from huge array to just one 109c real*8 in i3pre because it takes too much memory 110 111 call i3pre (BDiag, Binv, EGmass, ilwork) 112c 113c.... left EBE precondition the residual 114c 115ctoomuchmem call i3Pcond (Binv, res, ilwork, 'L_Pcond ') 116c 117c.... copy res in uBrg(1) 118c 119 uBrg(:,:,1) = res 120c 121c.... calculate norm of residual 122c 123 temp = res**2 124 125 call sumgat (temp, nflow, summed, ilwork) 126 unorm = sqrt(summed) 127c 128c.... check if GMRES iterations are required 129c 130 iKs = 0 131 lGMRES = 0 132c 133c.... if we are down to machine precision, don't bother solving 134c 135 if (unorm .lt. 100.*epsM**2) goto 3000 136c 137c.... set up tolerance of the Hessenberg's problem 138c 139 epsnrm = etol * unorm 140c 141c.... ------------------------> GMRES Loop <------------------------- 142c 143c.... loop through GMRES cycles 144c 145 do 2000 mGMRES = 1, nGMRES 146 lGMRES = mGMRES - 1 147c 148 if (lGMRES .gt. 0) then 149c 150c.... if GMRES restarts are necessary, calculate R - A x 151c 152c 153c.... right precondition Dy 154c 155 temp = Dy 156 157ctoomuchmem call i3Pcond (Binv, temp, ilwork, 'R_Pcond ') 158c 159c.... perform the A x product 160c 161 call Au1GMR (EGmass, temp, ilwork, iBC,iper) 162c 163c.... periodic nodes have to assemble results to their partners 164c 165 call bc3per (iBC, temp, iper, ilwork, nflow) 166c 167c.... left preconditioning 168c 169ctoomuchmem call i3Pcond (Binv, temp, ilwork, 'L_Pcond ') 170c 171c.... subtract A x from residual and calculate the norm 172c 173 temp = res - temp 174 uBrg(:,:,1) = temp 175c 176c.... calculate the norm 177c 178 temp = temp**2 179 call sumgat (temp, nflow, summed, ilwork) 180 unorm = sqrt(summed) 181c 182c.... flop count 183c 184 ! flops = flops + nflow*nshg+nshg 185c 186 endif 187c 188c.... set up RHS of the Hessenberg's problem 189c 190 call clear (eBrg, Kspace+1) 191 eBrg(1) = unorm 192c 193c.... normalize the first Krylov vector 194c 195 uBrg(:,:,1) = uBrg(:,:,1) / unorm 196c 197c.... loop through GMRES iterations 198c 199 do 1000 iK = 1, Kspace 200 iKs = iK 201 202 uBrg(:,:,iKs+1) = uBrg(:,:,iKs) 203c 204c.... right EBE precondition the LHS ( u_{i+1} <-- inverse(U) u_i ) 205c 206ctoomuchmem call i3Pcond (Binv, uBrg(:,:,iKs+1), ilwork, 'R_Pcond ') 207c 208c.... Au product ( u_{i+1} <-- EGmass u_{i+1} ) 209c 210 call Au1GMR ( EGmass, uBrg(:,:,iKs+1), ilwork, iBC,iper) 211c 212c.... periodic nodes have to assemble results to their partners 213c 214 call bc3per (iBC, uBrg(:,:,iKs+1), iper, ilwork, nflow) 215 216c 217c.... left EBE precondition the LHS ( u_{i+1} <-- inverse(L) u_{i+1} ) 218c 219ctoomuchmem call i3Pcond (Binv, uBrg(:,:,iKs+1), ilwork, 'L_Pcond ') 220c 221c.... orthogonalize and get the norm 222c 223 do jK = 1, iKs+1 224c 225 if (jK .eq. 1) then 226c 227 temp = uBrg(:,:,iKs+1) * uBrg(:,:,1) ! {u_{i+1}*u_1} vector 228 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1) 229c 230 else 231c 232c project off jK-1 vector 233c 234 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) - beta * uBrg(:,:,jK-1) 235c 236 temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector 237 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j) 238c 239 endif 240c 241 HBrg(jK,iKs) = beta ! put this in the Hessenberg Matrix 242c 243 enddo 244c 245 ! flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp 246c 247c the last inner product was with what was left of the vector (after 248c projecting off all of the previous vectors 249c 250 unorm = sqrt(beta) 251 HBrg(iKs+1,iKs) = unorm ! this fills the 1 sub diagonal band 252c 253c.... normalize the Krylov vector 254c 255 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm ! normalize the next Krylov 256c vector 257c 258c.... construct and reduce the Hessenberg Matrix 259c since there is only one subdiagonal we can use a Givens rotation to 260c rotate off each subdiagonal AS IT IS FORMED. We do this because it 261c allows us to check progress of solution and quit when satisfied. Note 262c that all future K vects will put a subdiagonal in the next column so 263c there is no penalty to work ahead as the rotation for the next vector 264c will be unaffected by this rotation. 265 266c 267c H Y = E ========> R_i H Y = R_i E 268c 269 do jK = 1, iKs-1 270 tmp = Rcos(jK) * HBrg(jK, iKs) + 271 & Rsin(jK) * HBrg(jK+1,iKs) 272 HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK, iKs) + 273 & Rcos(jK) * HBrg(jK+1,iKs) 274 HBrg(jK, iKs) = tmp 275 enddo 276c 277 tmp = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2) 278 Rcos(iKs) = HBrg(iKs, iKs) / tmp 279 Rsin(iKs) = HBrg(iKs+1,iKs) / tmp 280 HBrg(iKs, iKs) = tmp 281 HBrg(iKs+1,iKs) = zero 282c 283c.... rotate eBrg R_i E 284c 285 tmp = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1) 286 eBrg(iKs+1) =-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1) 287 eBrg(iKs) = tmp 288c 289c.... check for convergence 290c 291 ntotGM = ntotGM + 1 292 echeck=abs(eBrg(iKs+1)) 293 if (echeck .le. epsnrm) exit 294c 295c.... end of GMRES iteration loop 296c 297 1000 continue 298c 299c.... -------------------------> Solution <------------------------ 300c 301c.... if converged or end of Krylov space 302c 303c.... solve for yBrg 304c 305 do jK = iKs, 1, -1 306 yBrg(jK) = eBrg(jK) / HBrg(jK,jK) 307 do lK = 1, jK-1 308 eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK) 309 enddo 310 enddo 311c 312c.... update Dy 313c 314 do jK = 1, iKs 315 Dy = Dy + yBrg(jK) * uBrg(:,:,jK) 316 enddo 317c 318c.... flop count 319c 320 ! flops = flops + (3*iKs+1)*nflow*nshg 321c 322c.... check for convergence 323c 324 325 echeck=abs(eBrg(iKs+1)) 326 if (echeck .le. epsnrm) exit 327 if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction', 328 & (one-echeck/unorm)/(one-etol)*100 329c 330c.... end of mGMRES loop 331c 332 2000 continue 333c 334c.... ------------------------> Converged <------------------------ 335c 336c.... if converged 337c 338 3000 continue 339c 340c.... back EBE precondition the results 341c 342ctoomuchmem call i3Pcond (Binv, Dy, ilwork, 'R_Pcond ') 343c 344c.... back block-diagonal precondition the results 345c 346 call i3LU (BDiag, Dy, 'backward') 347c 348c 349c.... output the statistics 350c 351 call rstat (res, ilwork) 352c 353c.... stop the timer 354c 355 3002 continue ! no solve just res. 356 call timer ('Back ') 357c 358c.... end 359c 360 return 361 end 362 363 364 365 366 367 subroutine SolGMRs(y, ac, yold, acold, 368 & x, iBC, BC, 369 & col, row, lhsk, 370 & res, BDiag, HBrg, eBrg, 371 & yBrg, Rcos, Rsin, iper, 372 & ilwork, shp, shgl, shpb, 373 & shglb, Dy, rerr) 374c 375c---------------------------------------------------------------------- 376c 377c This is the preconditioned GMRES driver routine. 378c 379c input: 380c y (nshg,ndof) : Y-variables at n+alpha_v 381c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 382c yold (nshg,ndof) : Y-variables at beginning of step 383c acold (nshg,ndof) : Primvar. accel. variable at begng step 384c x (numnp,nsd) : node coordinates 385c iBC (nshg) : BC codes 386c BC (nshg,ndofBC) : BC constraint parameters 387c HBrg (Kspace+1,Kspace) : Hessenberg matrix (LHS matrix) 388c eBrg (Kspace+1) : RHS of Hessenberg minim. problem 389c yBrg (Kspace) : solution of Hessenberg minim. problem 390c Rcos (Kspace) : Rotational cosine of QR algorithm 391c Rsin (Kspace) : Rotational sine of QR algorithm 392c shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 393c shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 394c 395c output: 396c res (nshg,nflow) : preconditioned residual 397c BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 398c 399c 400c Zdenek Johan, Winter 1991. (Fortran 90) 401c---------------------------------------------------------------------- 402c 403 use pointer_data 404 405 include "common.h" 406 include "mpif.h" 407 include "auxmpi.h" 408c 409 integer col(nshg+1), row(nnz*nshg) 410 real*8 lhsK(nflow*nflow,nnz_tot) 411 412 413 dimension y(nshg,ndof), ac(nshg,ndof), 414 & yold(nshg,ndof), acold(nshg,ndof), 415 & x(numnp,nsd), 416 & iBC(nshg), BC(nshg,ndofBC), 417 & res(nshg,nflow), 418 & BDiag(nshg,nflow,nflow), 419 & HBrg(Kspace+1,Kspace), eBrg(Kspace+1), 420 & yBrg(Kspace), Rcos(Kspace), 421 & Rsin(Kspace), ilwork(nlwork), 422 & iper(nshg) 423c 424 dimension Dy(nshg,nflow), rmes(nshg,nflow), 425 & temp(nshg,nflow), 426 & uBrg(nshg,nflow,Kspace+1) 427c 428 dimension shp(MAXTOP,maxsh,MAXQPT), 429 & shgl(MAXTOP,nsd,maxsh,MAXQPT), 430 & shpb(MAXTOP,maxsh,MAXQPT), 431 & shglb(MAXTOP,nsd,maxsh,MAXQPT) 432 real*8 rerr(nshg,10) 433c 434c 435c.... *******************>> Element Data Formation <<****************** 436c 437c 438c.... set the parameters for flux and surface tension calculations 439c 440c 441 idflx = 0 442 if(idiff >= 1) idflx= idflx + (nflow-1) * nsd 443 if (isurf == 1) idflx=idflx + nsd 444c 445c.... form the LHS matrices, the residual vector, and the block 446c diagonal preconditioner 447c 448 call ElmGMRs(y, ac, x, 449 & shp, shgl, iBC, 450 & BC, shpb, 451 & shglb, res, 452 & rmes, BDiag, iper, 453 & ilwork, lhsK, col, 454 & row, rerr ) 455 call rstat (res, ilwork) 456 if(ntotGM.eq.0) resfrt=zero !don't let this mess up scaled dB 457 if(myrank.eq.master) then 458 write(*,*)'residual prior to sbd-preconditioning' 459 endif 460c 461 462 call tnanq(res,5, 'res_egmr') 463 call tnanq(BDiag,25, 'bdg_egmr') 464c 465c.... **********************>> EBE - GMRES <<******************** 466c 467 call timer ('Solver ') 468c 469c.... ------------------------> Initialization <----------------------- 470c 471c 472c.... LU decompose the block diagonals 473c 474 if (iprec .ne. 0) then 475 call i3LU (BDiag, res, 'LU_Fact ') 476 if (numpe > 1) then 477 call commu (BDiag , ilwork, nflow*nflow , 'out') 478 endif 479 endif 480c 481c.... block diagonal precondition residual 482c 483 call i3LU (BDiag, res, 'forward ') 484c 485c Check the residual for divering trend 486c 487 call rstatCheck(res,ilwork,y,ac) 488c 489c.... initialize Dy 490c 491 Dy = zero 492c 493c.... Pre-precondition the LHS mass matrix and set up the sparse 494c preconditioners 495c 496 497 if(lhs.eq.1) call Spsi3pre (BDiag, lhsK, col, row) 498c 499c.... copy res in uBrg(1) 500c 501 uBrg(:,:,1) = res 502c 503c.... calculate norm of residual 504c 505 temp = res**2 506 507 call sumgat (temp, nflow, summed, ilwork) 508 unorm = sqrt(summed) 509c 510c.... check if GMRES iterations are required 511c 512 iKs = 0 513 lGMRESs = 0 514c 515c.... if we are down to machine precision, don't bother solving 516c 517 if (unorm .lt. 100.*epsM**2) goto 3000 518c 519c.... set up tolerance of the Hessenberg's problem 520c 521 epsnrm = etol * unorm 522c 523c.... ------------------------> GMRES Loop <------------------------- 524c 525c.... loop through GMRES cycles 526c 527 do 2000 mGMRES = 1, nGMRES 528 lGMRESs = mGMRES - 1 529c 530 if (lGMRES .gt. 0) then 531c 532c.... if GMRES restarts are necessary, calculate R - A x 533c 534c 535c.... right precondition Dy 536c 537 temp = Dy 538 539c 540c.... perform the A x product 541c 542 call SparseAp (iper,ilwork,iBC, col, row, lhsK, temp) 543c call tnanq(temp,5, 'q_spAPrs') 544 545c 546c.... periodic nodes have to assemble results to their partners 547c 548 call bc3per (iBC, temp, iper, ilwork, nflow) 549c call tnanq(temp,5, 'q_BCprs') 550c 551c.... subtract A x from residual and calculate the norm 552c 553 temp = res - temp 554 uBrg(:,:,1) = temp 555c 556c.... calculate the norm 557c 558 temp = temp**2 559 call sumgat (temp, nflow, summed, ilwork) 560 unorm = sqrt(summed) 561c 562c.... flop count 563c 564 ! flops = flops + nflow*nshg+nshg 565c 566 endif 567c 568c.... set up RHS of the Hessenberg's problem 569c 570 call clear (eBrg, Kspace+1) 571 eBrg(1) = unorm 572c 573c.... normalize the first Krylov vector 574c 575 uBrg(:,:,1) = uBrg(:,:,1) / unorm 576c 577c.... loop through GMRES iterations 578c 579 do 1000 iK = 1, Kspace 580 iKs = iK 581 582 uBrg(:,:,iKs+1) = uBrg(:,:,iKs) 583c 584c.... Au product ( u_{i+1} <-- EGmass u_{i+1} ) 585c 586 call SparseAp (iper, ilwork, iBC, 587 & col, row, lhsK, 588 & uBrg(:,:,iKs+1) ) 589c call tnanq(uBrg(:,:,iKS+1),5, 'q_spAP') 590 591c 592c.... periodic nodes have to assemble results to their partners 593c 594 call bc3per (iBC, uBrg(:,:,iKs+1), iper, ilwork, nflow) 595c call tnanq(uBrg(:,:,iKS+1),5, 'q_bc') 596 597c 598c.... orthogonalize and get the norm 599c 600 do jK = 1, iKs+1 601c 602 if (jK .eq. 1) then 603c 604 temp = uBrg(:,:,iKs+1) * uBrg(:,:,1) ! {u_{i+1}*u_1} vector 605 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1) 606c 607 else 608c 609c project off jK-1 vector 610c 611 uBrg(:,:,iKs+1)=uBrg(:,:,iKs+1)-beta * uBrg(:,:,jK-1) 612c 613 temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector 614 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j) 615c 616 endif 617c 618 HBrg(jK,iKs) = beta ! put this in the Hessenberg Matrix 619c 620 enddo 621c 622 ! flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp 623c 624c the last inner product was with what was left of the vector (after 625c projecting off all of the previous vectors 626c 627 if(beta.le.0) write(*,*) 'beta in solgmr non-positive' 628 unorm = sqrt(beta) 629 HBrg(iKs+1,iKs) = unorm ! this fills the 1 sub diagonal band 630c 631c.... normalize the Krylov vector 632c 633 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm ! normalize the next Krylov 634c vector 635c 636c.... construct and reduce the Hessenberg Matrix 637c since there is only one subdiagonal we can use a Givens rotation to 638c rotate off each subdiagonal AS IT IS FORMED. We do this because it 639c allows us to check progress of solution and quit when satisfied. Note 640c that all future K vects will put a subdiagonal in the next column so 641c there is no penalty to work ahead as the rotation for the next vector 642c will be unaffected by this rotation. 643 644c 645c H Y = E ========> R_i H Y = R_i E 646c 647 do jK = 1, iKs-1 648 tmp = Rcos(jK) * HBrg(jK, iKs) + 649 & Rsin(jK) * HBrg(jK+1,iKs) 650 HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK, iKs) + 651 & Rcos(jK) * HBrg(jK+1,iKs) 652 HBrg(jK, iKs) = tmp 653 enddo 654c 655 tmp = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2) 656 Rcos(iKs) = HBrg(iKs, iKs) / tmp 657 Rsin(iKs) = HBrg(iKs+1,iKs) / tmp 658 HBrg(iKs, iKs)= tmp 659 HBrg(iKs+1,iKs)= zero 660c 661c.... rotate eBrg R_i E 662c 663 tmp = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1) 664 eBrg(iKs+1)=-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1) 665 eBrg(iKs) = tmp 666c 667c.... check for convergence 668c 669 ntotGM = ntotGM + 1 670 echeck=abs(eBrg(iKs+1)) 671 if (echeck .le. epsnrm.and. iKs .ge. minIters) exit 672c 673c.... end of GMRES iteration loop 674c 675 1000 continue 676c 677c.... -------------------------> Solution <------------------------ 678c 679c.... if converged or end of Krylov space 680c 681c.... solve for yBrg 682c 683 do jK = iKs, 1, -1 684 yBrg(jK) = eBrg(jK) / HBrg(jK,jK) 685 do lK = 1, jK-1 686 eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK) 687 enddo 688 enddo 689c 690c.... update Dy 691c 692 do jK = 1, iKs 693 Dy = Dy + yBrg(jK) * uBrg(:,:,jK) 694 enddo 695c 696c.... flop count 697c 698 ! flops = flops + (3*iKs+1)*nflow*nshg 699c 700c.... check for convergence 701c 702 echeck=abs(eBrg(iKs+1)) 703 if (echeck .le. epsnrm) exit 704! if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction', 705! & (one-echeck*etol/epsnrm)/(one-etol)*100 706 707c 708c.... end of mGMRES loop 709c 710 2000 continue 711c 712c.... ------------------------> Converged <------------------------ 713c 714c.... if converged 715c 716 3000 continue 717 718c 719c 720c.... back block-diagonal precondition the results 721c 722 call i3LU (BDiag, Dy, 'backward') 723c 724c 725c.... output the statistics 726c 727 call rstat (res, ilwork) 728 729 if(myrank.eq.master) then 730 if (echeck .le. epsnrm) then 731 write(*,*) 732 else 733 write(*,*)'solver tolerance %satisfaction', 734 & (one-echeck*etol/epsnrm)/(one-etol)*100 735 endif 736 endif 737c 738c.... stop the timer 739c 740 3002 continue ! no solve just res. 741 call timer ('Back ') 742c 743c.... end 744c 745 return 746 end 747 748 subroutine SolGMRSclr(y, ac, yold, 749 & acold, EGmasst, 750 & x, elDw, 751 & iBC, BC, 752 & rest, HBrg, eBrg, 753 & yBrg, Rcos, Rsin, iper, 754 & ilwork, 755 & shp, shgl, 756 & shpb, shglb, Dyt) 757c 758c---------------------------------------------------------------------- 759c 760c This is the preconditioned GMRES driver routine. 761c 762c input: 763c y (nshg,ndof) : Y-variables at n+alpha_f 764c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 765c yold (nshg,ndof) : Y-variables at beginning of step 766c acold (nshg,ndof) : Primvar. accel. variable at begng step 767c x (numnp,nsd) : node coordinates 768c iBC (nshg) : BC codes 769c BC (nshg,ndofBC) : BC constraint parameters 770c iper (nshg) : periodic nodal information 771c 772c output: 773c y (nshg,ndof) : Y-variables at n+alpha_f 774c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 775c HBrg (Kspace+1,Kspace) : Hessenberg matrix (LHS matrix) 776c eBrg (Kspace+1) : RHS of Hessenberg minim. problem 777c yBrg (Kspace) : solution of Hessenberg minim. problem 778c Rcos (Kspace) : Rotational cosine of QR algorithm 779c Rsin (Kspace) : Rotational sine of QR algorithm 780c output: 781c rest (numnp) : preconditioned residual 782c 783c 784c Zdenek Johan, Winter 1991. (Fortran 90) 785c---------------------------------------------------------------------- 786c 787 use pointer_data 788 789 include "common.h" 790 include "mpif.h" 791 include "auxmpi.h" 792c 793 dimension y(nshg,ndof), ac(nshg,ndof), 794 & yold(nshg,ndof), acold(nshg,ndof), 795 & x(numnp,nsd), 796 & iBC(nshg), BC(nshg,ndofBC), 797 & rest(nshg), 798 & Diag(nshg), 799 & HBrg(Kspace+1,*), eBrg(*), 800 & yBrg(*), Rcos(*), 801 & Rsin(*), ilwork(nlwork), 802 & iper(nshg), EGmasst(numel,nshape,nshape) 803c 804 dimension Dyt(nshg), rmest(nshg), 805 & tempt(nshg), Dinv(nshg), 806 & uBrgt(nshg,Kspace+1) 807c 808 dimension shp(MAXTOP,maxsh,MAXQPT), 809 & shgl(MAXTOP,nsd,maxsh,MAXQPT), 810 & shpb(MAXTOP,maxsh,MAXQPT), 811 & shglb(MAXTOP,nsd,maxsh,MAXQPT) 812 real*8 elDw(numel) 813c 814c.... *******************>> Element Data Formation <<****************** 815c 816c 817c.... form the LHS matrices, the residual vector, and the block 818c diagonal preconditioner 819c 820 call ElmGMRSclr(y, ac, 821 & x, elDw, shp, shgl, 822 & iBC, BC, 823 & shpb, shglb, 824 & rest, 825 & rmest, Diag, iper, 826 & ilwork, EGmasst) 827c 828c.... **********************>> EBE - GMRES <<******************** 829c 830 call timer ('Solver ') 831c 832c.... ------------------------> Initialization <----------------------- 833c 834c 835 id = isclr+5 836c.... initialize Dy 837c 838 Dyt = zero 839c 840c.... Right preconditioner 841c 842 call i3preSclr(Diag, Dinv, EGmassT, ilwork) 843c 844c Check the residual for divering trend 845c 846 847 call rstatCheckSclr(rest,ilwork,y,ac) 848 849c 850c.... copy rest in uBrgt(1) 851c 852 uBrgt(:,1) = rest 853c 854c.... calculate norm of residual 855c 856 tempt = rest**2 857 858 call sumgat (tempt, 1, summed, ilwork) 859 unorm = sqrt(summed) 860c 861c.... check if GMRES iterations are required 862c 863 iKss = 0 864 lGMRESt = 0 865c 866c.... if we are down to machine precision, don't bother solving 867c 868 if (unorm .lt. 100.*epsM**2) goto 3000 869c 870c.... set up tolerance of the Hessenberg's problem 871c 872 epsnrm = etol * unorm 873c 874c.... ------------------------> GMRES Loop <------------------------- 875c 876c.... loop through GMRES cycles 877c 878 do 2000 mGMRES = 1, nGMRES 879 lGMRESt = mGMRES - 1 880c 881 if (lGMRESt .gt. 0) then 882c 883c.... if GMRES restarts are necessary, calculate R - A x 884c 885c 886 887c.... perform the A x product 888c 889 call Au1GMRSclr (EGmasst, tempt, ilwork, iper) 890c 891c.... periodic nodes have to assemble results to their partners 892c 893c call bc3perSclr (iBC, tempt, iper) 894c 895c.... subtract A x from residual and calculate the norm 896c 897 tempt = rest - tempt 898 uBrgt(:,1) = tempt 899c 900c.... calculate the norm 901c 902 tempt = tempt**2 903 call sumgat (tempt, 1, summed, ilwork) 904 unorm = sqrt(summed) 905c 906c.... flop count 907c 908 ! flops = flops + ndof*numnp+numnp 909c 910 endif 911c 912c.... set up RHS of the Hessenberg's problem 913c 914 call clear (eBrg, Kspace+1) 915 call clear (HBrg, Kspace+1) 916 eBrg(1) = unorm 917c 918c.... normalize the first Krylov vector 919c 920 uBrgt(:,1) = uBrgt(:,1) / unorm 921c 922c.... loop through GMRES iterations 923c 924 do 1000 iK = 1, Kspace 925 iKss = iK 926 927 uBrgt(:,iKss+1) = uBrgt(:,iKss) 928 929c.... Au product ( u_{i+1} <-- EGmass u_{i+1} ) 930c 931 call Au1GMRSclr ( EGmasst, uBrgt(:,iKss+1), ilwork, iper ) 932 933c 934c.... periodic nodes have to assemble results to their partners 935c 936 call bc3perSclr (iBC, uBrgt(:,iKss+1), iper) 937c 938c.... orthogonalize and get the norm 939c 940 do jK = 1, iKss+1 941c 942 if (jK .eq. 1) then 943c 944 tempt = uBrgt(:,iKss+1) * uBrgt(:,1) ! {u_{i+1}*u_1} vector 945 call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_1) 946c 947 else 948c 949c project off jK-1 vector 950c 951 uBrgt(:,iKss+1) = uBrgt(:,iKss+1) - beta * uBrgt(:,jK-1) 952c 953 tempt = uBrgt(:,iKss+1) * uBrgt(:,jK) !{u_{i+1}*u_j} vector 954 call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_j) 955c 956 endif 957c 958 HBrg(jK,iKss) = beta ! put this in the Hessenberg Matrix 959c 960 enddo 961c 962 ! flops = flops + (3*iKss+1)*ndof*numnp+(iKss+1)*numnp 963c 964c the last inner product was with what was left of the vector (after 965c projecting off all of the previous vectors 966c 967 unorm = sqrt(beta) 968 HBrg(iKss+1,iKss) = unorm ! this fills the 1 sub diagonal band 969c 970c.... normalize the Krylov vector 971c 972 uBrgt(:,iKss+1) = uBrgt(:,iKss+1) / unorm ! normalize the next Krylov 973c vector 974c 975c.... construct and reduce the Hessenberg Matrix 976c since there is only one subdiagonal we can use a Givens rotation to 977c rotate off each subdiagonal AS IT IS FORMED. We do this because it 978c allows us to check progress of solution and quit when satisfied. Note 979c that all future K vects will put a subdiagonal in the next column so 980c there is no penalty to work ahead as the rotation for the next vector 981c will be unaffected by this rotation. 982 983c 984c H Y = E ========> R_i H Y = R_i E 985c 986 do jK = 1, iKss-1 987 tmp = Rcos(jK) * HBrg(jK, iKss) + 988 & Rsin(jK) * HBrg(jK+1,iKss) 989 HBrg(jK+1,iKss) = -Rsin(jK) * HBrg(jK, iKss) + 990 & Rcos(jK) * HBrg(jK+1,iKss) 991 HBrg(jK, iKss) = tmp 992 enddo 993c 994 tmp = sqrt(HBrg(iKss,iKss)**2 + HBrg(iKss+1,iKss)**2) 995 Rcos(iKss) = HBrg(iKss, iKss) / tmp 996 Rsin(iKss) = HBrg(iKss+1,iKss) / tmp 997 HBrg(iKss, iKss) = tmp 998 HBrg(iKss+1,iKss) = zero 999c 1000c.... rotate eBrg R_i E 1001c 1002 tmp = Rcos(iKss)*eBrg(iKss) + Rsin(iKss)*eBrg(iKss+1) 1003 eBrg(iKss+1)=-Rsin(iKss)*eBrg(iKss) + Rcos(iKss)*eBrg(iKss+1) 1004 eBrg(iKss) = tmp 1005c 1006c.... check for convergence 1007c 1008 ercheck=eBrg(iKss+1) 1009 ntotGMs = ntotGMs + 1 1010 if (abs(eBrg(iKss+1)) .le. epsnrm) exit 1011c 1012c.... end of GMRES iteration loop 1013c 1014 1000 continue 1015c 1016c.... -------------------------> Solution <------------------------ 1017c 1018c.... if converged or end of Krylov space 1019c 1020c.... solve for yBrg 1021c 1022 do jK = iKss, 1, -1 1023 yBrg(jK) = eBrg(jK) / HBrg(jK,jK) 1024 do lK = 1, jK-1 1025 eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK) 1026 enddo 1027 enddo 1028c 1029c.... update Dy 1030c 1031 do jK = 1, iKss 1032 Dyt = Dyt + yBrg(jK) * uBrgt(:,jK) 1033 enddo 1034c 1035c.... flop count 1036c 1037 ! flops = flops + (3*iKss+1)*ndof*numnp 1038c 1039c.... check for convergence 1040c 1041 if (abs(eBrg(iKss+1)) .le. epsnrm) exit 1042c 1043c.... end of mGMRES loop 1044c 1045 2000 continue 1046c 1047c.... ------------------------> Converged <------------------------ 1048c 1049c.... if converged 1050c 1051 3000 continue 1052c 1053c.... back precondition the result 1054c 1055 Dyt(:) = Dyt(:) * Dinv(:) 1056c 1057c.... output the statistics 1058c 1059 call rstatSclr(rest, ilwork) 1060c.... stop the timer 1061c 1062 3002 continue ! no solve just res. 1063 call timer ('Back ') 1064c 1065c.... end 1066c 1067 return 1068 end 1069 1070 1071