1 subroutine SolGMRe (y, ac, yold, acold, 2 & x, iBC, BC, EGmass, 3 & res, BDiag, HBrg, eBrg, 4 & yBrg, Rcos, Rsin, iper, 5 & ilwork, shp, shgl, shpb, 6 & shglb, Dy, rerr) 7c 8c---------------------------------------------------------------------- 9c 10c This is the preconditioned GMRES driver routine. 11c 12c input: 13c y (nshg,ndof) : Y-variables at n+alpha_v 14c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 15c yold (nshg,ndof) : Y-variables at beginning of step 16c acold (nshg,ndof) : Primvar. accel. variable at begng step 17c x (numnp,nsd) : node coordinates 18c iBC (nshg) : BC codes 19c BC (nshg,ndofBC) : BC constraint parameters 20c HBrg (Kspace+1,Kspace) : Hessenberg matrix (LHS matrix) 21c eBrg (Kspace+1) : RHS of Hessenberg minim. problem 22c yBrg (Kspace) : solution of Hessenberg minim. problem 23c Rcos (Kspace) : Rotational cosine of QR algorithm 24c Rsin (Kspace) : Rotational sine of QR algorithm 25c shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 26c shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 27c 28c output: 29c res (nshg,nflow) : preconditioned residual 30c BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 31c 32c 33c Zdenek Johan, Winter 1991. (Fortran 90) 34c---------------------------------------------------------------------- 35c 36 use pointer_data 37 38 include "common.h" 39 include "mpif.h" 40 include "auxmpi.h" 41c 42 dimension y(nshg,ndof), ac(nshg,ndof), 43 & yold(nshg,ndof), acold(nshg,ndof), 44 & x(numnp,nsd), 45 & iBC(nshg), BC(nshg,ndofBC), 46 & res(nshg,nflow), 47 & BDiag(nshg,nflow,nflow), 48 & HBrg(Kspace+1,*), eBrg(*), 49 & yBrg(*), Rcos(*), 50 & Rsin(*), ilwork(nlwork), 51 & iper(nshg), EGmass(numel,nedof,nedof)!, 52ctoomuchmem & Binv(numel,nedof,nedof) 53c 54 dimension Dy(nshg,nflow), rmes(nshg,nflow), 55 & temp(nshg,nflow), 56 & uBrg(nshg,nflow,Kspace+1) 57c 58 dimension shp(MAXTOP,maxsh,MAXQPT), 59 & shgl(MAXTOP,nsd,maxsh,MAXQPT), 60 & shpb(MAXTOP,maxsh,MAXQPT), 61 & shglb(MAXTOP,nsd,maxsh,MAXQPT) 62 real*8 rerr(nshg,10) 63c 64c.... *******************>> Element Data Formation <<****************** 65c 66c 67c.... set the parameters for flux and surface tension calculations 68c 69c 70 idflx = 0 71 if(idiff >= 1) idflx= idflx + (nflow-1) * nsd 72 if (isurf == 1) idflx=idflx + nsd 73c 74c.... form the LHS matrices, the residual vector, and the block 75c diagonal preconditioner 76c 77 call ElmGMRe(y, ac, x, 78 & shp, shgl, iBC, 79 & BC, shpb, 80 & shglb, res, 81 & rmes, BDiag, iper, 82 & ilwork, EGmass, rerr ) 83c 84c.... **********************>> EBE - GMRES <<******************** 85c 86 call timer ('Solver ') 87c 88c.... ------------------------> Initialization <----------------------- 89c 90c 91c.... LU decompose the block diagonals 92c 93 if (iprec .ne. 0) 94 & call i3LU (BDiag, res, 'LU_Fact ') 95 96c 97c.... block diagonal precondition residual 98c 99 call i3LU (BDiag, res, 'forward ') 100c 101c.... initialize Dy 102c 103 Dy = zero 104c 105c.... Pre-precondition the LHS mass matrix and set up the element 106c by element preconditioners 107c 108ctoomuchmemory note that Binv is demoted from huge array to just one 109c real*8 in i3pre because it takes too much memory 110 111 call i3pre (BDiag, Binv, EGmass, ilwork) 112c 113c.... left EBE precondition the residual 114c 115ctoomuchmem call i3Pcond (Binv, res, ilwork, 'L_Pcond ') 116c 117c.... copy res in uBrg(1) 118c 119 uBrg(:,:,1) = res 120c 121c.... calculate norm of residual 122c 123 temp = res**2 124 125 call sumgat (temp, nflow, summed, ilwork) 126 unorm = sqrt(summed) 127c 128c.... check if GMRES iterations are required 129c 130 iKs = 0 131 lGMRES = 0 132c 133c.... if we are down to machine precision, don't bother solving 134c 135 if (unorm .lt. 100.*epsM**2) goto 3000 136c 137c.... set up tolerance of the Hessenberg's problem 138c 139 epsnrm = etol * unorm 140c 141c.... ------------------------> GMRES Loop <------------------------- 142c 143c.... loop through GMRES cycles 144c 145 do 2000 mGMRES = 1, nGMRES 146 lGMRES = mGMRES - 1 147c 148 if (lGMRES .gt. 0) then 149c 150c.... if GMRES restarts are necessary, calculate R - A x 151c 152c 153c.... right precondition Dy 154c 155 temp = Dy 156 157ctoomuchmem call i3Pcond (Binv, temp, ilwork, 'R_Pcond ') 158c 159c.... perform the A x product 160c 161 call Au1GMR (EGmass, temp, ilwork, iBC,iper) 162c 163c.... periodic nodes have to assemble results to their partners 164c 165 call bc3per (iBC, temp, iper, ilwork, nflow) 166c 167c.... left preconditioning 168c 169ctoomuchmem call i3Pcond (Binv, temp, ilwork, 'L_Pcond ') 170c 171c.... subtract A x from residual and calculate the norm 172c 173 temp = res - temp 174 uBrg(:,:,1) = temp 175c 176c.... calculate the norm 177c 178 temp = temp**2 179 call sumgat (temp, nflow, summed, ilwork) 180 unorm = sqrt(summed) 181c 182c.... flop count 183c 184 flops = flops + nflow*nshg+nshg 185c 186 endif 187c 188c.... set up RHS of the Hessenberg's problem 189c 190 call clear (eBrg, Kspace+1) 191 eBrg(1) = unorm 192c 193c.... normalize the first Krylov vector 194c 195 uBrg(:,:,1) = uBrg(:,:,1) / unorm 196c 197c.... loop through GMRES iterations 198c 199 do 1000 iK = 1, Kspace 200 iKs = iK 201 202 uBrg(:,:,iKs+1) = uBrg(:,:,iKs) 203c 204c.... right EBE precondition the LHS ( u_{i+1} <-- inverse(U) u_i ) 205c 206ctoomuchmem call i3Pcond (Binv, uBrg(:,:,iKs+1), ilwork, 'R_Pcond ') 207c 208c.... Au product ( u_{i+1} <-- EGmass u_{i+1} ) 209c 210 call Au1GMR ( EGmass, uBrg(:,:,iKs+1), ilwork, iBC,iper) 211c 212c.... periodic nodes have to assemble results to their partners 213c 214 call bc3per (iBC, uBrg(:,:,iKs+1), iper, ilwork, nflow) 215 216c 217c.... left EBE precondition the LHS ( u_{i+1} <-- inverse(L) u_{i+1} ) 218c 219ctoomuchmem call i3Pcond (Binv, uBrg(:,:,iKs+1), ilwork, 'L_Pcond ') 220c 221c.... orthogonalize and get the norm 222c 223 do jK = 1, iKs+1 224c 225 if (jK .eq. 1) then 226c 227 temp = uBrg(:,:,iKs+1) * uBrg(:,:,1) ! {u_{i+1}*u_1} vector 228 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1) 229c 230 else 231c 232c project off jK-1 vector 233c 234 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) - beta * uBrg(:,:,jK-1) 235c 236 temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector 237 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j) 238c 239 endif 240c 241 HBrg(jK,iKs) = beta ! put this in the Hessenberg Matrix 242c 243 enddo 244c 245 flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp 246c 247c the last inner product was with what was left of the vector (after 248c projecting off all of the previous vectors 249c 250 unorm = sqrt(beta) 251 HBrg(iKs+1,iKs) = unorm ! this fills the 1 sub diagonal band 252c 253c.... normalize the Krylov vector 254c 255 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm ! normalize the next Krylov 256c vector 257c 258c.... construct and reduce the Hessenberg Matrix 259c since there is only one subdiagonal we can use a Givens rotation to 260c rotate off each subdiagonal AS IT IS FORMED. We do this because it 261c allows us to check progress of solution and quit when satisfied. Note 262c that all future K vects will put a subdiagonal in the next column so 263c there is no penalty to work ahead as the rotation for the next vector 264c will be unaffected by this rotation. 265 266c 267c H Y = E ========> R_i H Y = R_i E 268c 269 do jK = 1, iKs-1 270 tmp = Rcos(jK) * HBrg(jK, iKs) + 271 & Rsin(jK) * HBrg(jK+1,iKs) 272 HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK, iKs) + 273 & Rcos(jK) * HBrg(jK+1,iKs) 274 HBrg(jK, iKs) = tmp 275 enddo 276c 277 tmp = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2) 278 Rcos(iKs) = HBrg(iKs, iKs) / tmp 279 Rsin(iKs) = HBrg(iKs+1,iKs) / tmp 280 HBrg(iKs, iKs) = tmp 281 HBrg(iKs+1,iKs) = zero 282c 283c.... rotate eBrg R_i E 284c 285 tmp = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1) 286 eBrg(iKs+1) =-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1) 287 eBrg(iKs) = tmp 288c 289c.... check for convergence 290c 291 ntotGM = ntotGM + 1 292 echeck=abs(eBrg(iKs+1)) 293 if (echeck .le. epsnrm) exit 294c 295c.... end of GMRES iteration loop 296c 297 1000 continue 298c 299c.... -------------------------> Solution <------------------------ 300c 301c.... if converged or end of Krylov space 302c 303c.... solve for yBrg 304c 305 do jK = iKs, 1, -1 306 yBrg(jK) = eBrg(jK) / HBrg(jK,jK) 307 do lK = 1, jK-1 308 eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK) 309 enddo 310 enddo 311c 312c.... update Dy 313c 314 do jK = 1, iKs 315 Dy = Dy + yBrg(jK) * uBrg(:,:,jK) 316 enddo 317c 318c.... flop count 319c 320 flops = flops + (3*iKs+1)*nflow*nshg 321c 322c.... check for convergence 323c 324 325 echeck=abs(eBrg(iKs+1)) 326 if (echeck .le. epsnrm) exit 327 if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction', 328 & (one-echeck/unorm)/(one-etol)*100 329c 330c.... end of mGMRES loop 331c 332 2000 continue 333c 334c.... ------------------------> Converged <------------------------ 335c 336c.... if converged 337c 338 3000 continue 339c 340c.... back EBE precondition the results 341c 342ctoomuchmem call i3Pcond (Binv, Dy, ilwork, 'R_Pcond ') 343c 344c.... back block-diagonal precondition the results 345c 346 call i3LU (BDiag, Dy, 'backward') 347c 348c 349c.... output the statistics 350c 351 call rstat (res, ilwork) 352c 353c.... stop the timer 354c 355 3002 continue ! no solve just res. 356 call timer ('Back ') 357c 358c.... end 359c 360 return 361 end 362 363 364 365 366 367 subroutine SolGMRs(y, ac, yold, acold, 368 & x, iBC, BC, 369 & col, row, lhsk, 370 & res, BDiag, HBrg, eBrg, 371 & yBrg, Rcos, Rsin, iper, 372 & ilwork, shp, shgl, shpb, 373 & shglb, Dy, rerr) 374c 375c---------------------------------------------------------------------- 376c 377c This is the preconditioned GMRES driver routine. 378c 379c input: 380c y (nshg,ndof) : Y-variables at n+alpha_v 381c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 382c yold (nshg,ndof) : Y-variables at beginning of step 383c acold (nshg,ndof) : Primvar. accel. variable at begng step 384c x (numnp,nsd) : node coordinates 385c iBC (nshg) : BC codes 386c BC (nshg,ndofBC) : BC constraint parameters 387c HBrg (Kspace+1,Kspace) : Hessenberg matrix (LHS matrix) 388c eBrg (Kspace+1) : RHS of Hessenberg minim. problem 389c yBrg (Kspace) : solution of Hessenberg minim. problem 390c Rcos (Kspace) : Rotational cosine of QR algorithm 391c Rsin (Kspace) : Rotational sine of QR algorithm 392c shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 393c shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 394c 395c output: 396c res (nshg,nflow) : preconditioned residual 397c BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 398c 399c 400c Zdenek Johan, Winter 1991. (Fortran 90) 401c---------------------------------------------------------------------- 402c 403 use pointer_data 404 405 include "common.h" 406 include "mpif.h" 407 include "auxmpi.h" 408c 409 integer col(nshg+1), row(nnz*nshg) 410 real*8 lhsK(nflow*nflow,nnz_tot) 411 412 413 dimension y(nshg,ndof), ac(nshg,ndof), 414 & yold(nshg,ndof), acold(nshg,ndof), 415 & x(numnp,nsd), 416 & iBC(nshg), BC(nshg,ndofBC), 417 & res(nshg,nflow), 418 & BDiag(nshg,nflow,nflow), 419 & HBrg(Kspace+1,Kspace), eBrg(Kspace+1), 420 & yBrg(Kspace), Rcos(Kspace), 421 & Rsin(Kspace), ilwork(nlwork), 422 & iper(nshg) 423c 424 dimension Dy(nshg,nflow), rmes(nshg,nflow), 425 & temp(nshg,nflow), 426 & uBrg(nshg,nflow,Kspace+1) 427c 428 dimension shp(MAXTOP,maxsh,MAXQPT), 429 & shgl(MAXTOP,nsd,maxsh,MAXQPT), 430 & shpb(MAXTOP,maxsh,MAXQPT), 431 & shglb(MAXTOP,nsd,maxsh,MAXQPT) 432 real*8 rerr(nshg,10) 433c 434c 435c.... *******************>> Element Data Formation <<****************** 436c 437c 438c.... set the parameters for flux and surface tension calculations 439c 440c 441 idflx = 0 442 if(idiff >= 1) idflx= idflx + (nflow-1) * nsd 443 if (isurf == 1) idflx=idflx + nsd 444c 445c.... form the LHS matrices, the residual vector, and the block 446c diagonal preconditioner 447c 448 call ElmGMRs(y, ac, x, 449 & shp, shgl, iBC, 450 & BC, shpb, 451 & shglb, res, 452 & rmes, BDiag, iper, 453 & ilwork, lhsK, col, 454 & row, rerr ) 455 456 call tnanq(res,5, 'res_egmr') 457 call tnanq(BDiag,25, 'bdg_egmr') 458c 459c.... **********************>> EBE - GMRES <<******************** 460c 461 call timer ('Solver ') 462c 463c.... ------------------------> Initialization <----------------------- 464c 465c 466c.... LU decompose the block diagonals 467c 468 if (iprec .ne. 0) then 469 call i3LU (BDiag, res, 'LU_Fact ') 470 if (numpe > 1) then 471 call commu (BDiag , ilwork, nflow*nflow , 'out') 472 endif 473 endif 474c 475c.... block diagonal precondition residual 476c 477 call i3LU (BDiag, res, 'forward ') 478c 479c Check the residual for divering trend 480c 481 call rstatCheck(res,ilwork,y,ac) 482c 483c.... initialize Dy 484c 485 Dy = zero 486c 487c.... Pre-precondition the LHS mass matrix and set up the sparse 488c preconditioners 489c 490 491 if(lhs.eq.1) call Spsi3pre (BDiag, lhsK, col, row) 492c 493c.... copy res in uBrg(1) 494c 495 uBrg(:,:,1) = res 496c 497c.... calculate norm of residual 498c 499 temp = res**2 500 501 call sumgat (temp, nflow, summed, ilwork) 502 unorm = sqrt(summed) 503c 504c.... check if GMRES iterations are required 505c 506 iKs = 0 507 lGMRES = 0 508c 509c.... if we are down to machine precision, don't bother solving 510c 511 if (unorm .lt. 100.*epsM**2) goto 3000 512c 513c.... set up tolerance of the Hessenberg's problem 514c 515 epsnrm = etol * unorm 516c 517c.... ------------------------> GMRES Loop <------------------------- 518c 519c.... loop through GMRES cycles 520c 521 do 2000 mGMRES = 1, nGMRES 522 lGMRES = mGMRES - 1 523c 524 if (lGMRES .gt. 0) then 525c 526c.... if GMRES restarts are necessary, calculate R - A x 527c 528c 529c.... right precondition Dy 530c 531 temp = Dy 532 533c 534c.... perform the A x product 535c 536 call SparseAp (iper,ilwork,iBC, col, row, lhsK, temp) 537 call tnanq(temp,5, 'q_spAPrs') 538 539c 540c.... periodic nodes have to assemble results to their partners 541c 542 call bc3per (iBC, temp, iper, ilwork, nflow) 543 call tnanq(temp,5, 'q_BCprs') 544c 545c.... subtract A x from residual and calculate the norm 546c 547 temp = res - temp 548 uBrg(:,:,1) = temp 549c 550c.... calculate the norm 551c 552 temp = temp**2 553 call sumgat (temp, nflow, summed, ilwork) 554 unorm = sqrt(summed) 555c 556c.... flop count 557c 558 flops = flops + nflow*nshg+nshg 559c 560 endif 561c 562c.... set up RHS of the Hessenberg's problem 563c 564 call clear (eBrg, Kspace+1) 565 eBrg(1) = unorm 566c 567c.... normalize the first Krylov vector 568c 569 uBrg(:,:,1) = uBrg(:,:,1) / unorm 570c 571c.... loop through GMRES iterations 572c 573 do 1000 iK = 1, Kspace 574 iKs = iK 575 576 uBrg(:,:,iKs+1) = uBrg(:,:,iKs) 577c 578c.... Au product ( u_{i+1} <-- EGmass u_{i+1} ) 579c 580 call SparseAp (iper, ilwork, iBC, 581 & col, row, lhsK, 582 & uBrg(:,:,iKs+1) ) 583 call tnanq(uBrg(:,:,iKS+1),5, 'q_spAP') 584 585c 586c.... periodic nodes have to assemble results to their partners 587c 588 call bc3per (iBC, uBrg(:,:,iKs+1), iper, ilwork, nflow) 589 call tnanq(uBrg(:,:,iKS+1),5, 'q_bc') 590 591c 592c.... orthogonalize and get the norm 593c 594 do jK = 1, iKs+1 595c 596 if (jK .eq. 1) then 597c 598 temp = uBrg(:,:,iKs+1) * uBrg(:,:,1) ! {u_{i+1}*u_1} vector 599 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1) 600c 601 else 602c 603c project off jK-1 vector 604c 605 uBrg(:,:,iKs+1)=uBrg(:,:,iKs+1)-beta * uBrg(:,:,jK-1) 606c 607 temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector 608 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j) 609c 610 endif 611c 612 HBrg(jK,iKs) = beta ! put this in the Hessenberg Matrix 613c 614 enddo 615c 616 flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp 617c 618c the last inner product was with what was left of the vector (after 619c projecting off all of the previous vectors 620c 621 if(beta.le.0) write(*,*) 'beta in solgrm non-positive' 622 unorm = sqrt(beta) 623 HBrg(iKs+1,iKs) = unorm ! this fills the 1 sub diagonal band 624c 625c.... normalize the Krylov vector 626c 627 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm ! normalize the next Krylov 628c vector 629c 630c.... construct and reduce the Hessenberg Matrix 631c since there is only one subdiagonal we can use a Givens rotation to 632c rotate off each subdiagonal AS IT IS FORMED. We do this because it 633c allows us to check progress of solution and quit when satisfied. Note 634c that all future K vects will put a subdiagonal in the next column so 635c there is no penalty to work ahead as the rotation for the next vector 636c will be unaffected by this rotation. 637 638c 639c H Y = E ========> R_i H Y = R_i E 640c 641 do jK = 1, iKs-1 642 tmp = Rcos(jK) * HBrg(jK, iKs) + 643 & Rsin(jK) * HBrg(jK+1,iKs) 644 HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK, iKs) + 645 & Rcos(jK) * HBrg(jK+1,iKs) 646 HBrg(jK, iKs) = tmp 647 enddo 648c 649 tmp = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2) 650 Rcos(iKs) = HBrg(iKs, iKs) / tmp 651 Rsin(iKs) = HBrg(iKs+1,iKs) / tmp 652 HBrg(iKs, iKs)= tmp 653 HBrg(iKs+1,iKs)= zero 654c 655c.... rotate eBrg R_i E 656c 657 tmp = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1) 658 eBrg(iKs+1)=-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1) 659 eBrg(iKs) = tmp 660c 661c.... check for convergence 662c 663 ntotGM = ntotGM + 1 664 echeck=abs(eBrg(iKs+1)) 665 if (echeck .le. epsnrm.and. iKs .ge. minIters) exit 666c 667c.... end of GMRES iteration loop 668c 669 1000 continue 670c 671c.... -------------------------> Solution <------------------------ 672c 673c.... if converged or end of Krylov space 674c 675c.... solve for yBrg 676c 677 do jK = iKs, 1, -1 678 yBrg(jK) = eBrg(jK) / HBrg(jK,jK) 679 do lK = 1, jK-1 680 eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK) 681 enddo 682 enddo 683c 684c.... update Dy 685c 686 do jK = 1, iKs 687 Dy = Dy + yBrg(jK) * uBrg(:,:,jK) 688 enddo 689c 690c.... flop count 691c 692 flops = flops + (3*iKs+1)*nflow*nshg 693c 694c.... check for convergence 695c 696 echeck=abs(eBrg(iKs+1)) 697 if (echeck .le. epsnrm) exit 698 if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction', 699 & (one-echeck*etol/epsnrm)/(one-etol)*100 700 701c 702c.... end of mGMRES loop 703c 704 2000 continue 705c 706c.... ------------------------> Converged <------------------------ 707c 708c.... if converged 709c 710 3000 continue 711 712c 713c 714c.... back block-diagonal precondition the results 715c 716 call i3LU (BDiag, Dy, 'backward') 717c 718c 719c.... output the statistics 720c 721 call rstat (res, ilwork) 722c 723c.... stop the timer 724c 725 3002 continue ! no solve just res. 726 call timer ('Back ') 727c 728c.... end 729c 730 return 731 end 732 733 subroutine SolGMRSclr(y, ac, yold, 734 & acold, EGmasst, 735 & x, elDw, 736 & iBC, BC, 737 & rest, HBrg, eBrg, 738 & yBrg, Rcos, Rsin, iper, 739 & ilwork, 740 & shp, shgl, 741 & shpb, shglb, Dyt) 742c 743c---------------------------------------------------------------------- 744c 745c This is the preconditioned GMRES driver routine. 746c 747c input: 748c y (nshg,ndof) : Y-variables at n+alpha_f 749c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 750c yold (nshg,ndof) : Y-variables at beginning of step 751c acold (nshg,ndof) : Primvar. accel. variable at begng step 752c x (numnp,nsd) : node coordinates 753c iBC (nshg) : BC codes 754c BC (nshg,ndofBC) : BC constraint parameters 755c iper (nshg) : periodic nodal information 756c 757c output: 758c y (nshg,ndof) : Y-variables at n+alpha_f 759c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 760c HBrg (Kspace+1,Kspace) : Hessenberg matrix (LHS matrix) 761c eBrg (Kspace+1) : RHS of Hessenberg minim. problem 762c yBrg (Kspace) : solution of Hessenberg minim. problem 763c Rcos (Kspace) : Rotational cosine of QR algorithm 764c Rsin (Kspace) : Rotational sine of QR algorithm 765c output: 766c rest (numnp) : preconditioned residual 767c 768c 769c Zdenek Johan, Winter 1991. (Fortran 90) 770c---------------------------------------------------------------------- 771c 772 use pointer_data 773 774 include "common.h" 775 include "mpif.h" 776 include "auxmpi.h" 777c 778 dimension y(nshg,ndof), ac(nshg,ndof), 779 & yold(nshg,ndof), acold(nshg,ndof), 780 & x(numnp,nsd), 781 & iBC(nshg), BC(nshg,ndofBC), 782 & rest(nshg), 783 & Diag(nshg), 784 & HBrg(Kspace+1,*), eBrg(*), 785 & yBrg(*), Rcos(*), 786 & Rsin(*), ilwork(nlwork), 787 & iper(nshg), EGmasst(numel,nshape,nshape) 788c 789 dimension Dyt(nshg), rmest(nshg), 790 & tempt(nshg), Dinv(nshg), 791 & uBrgt(nshg,Kspace+1) 792c 793 dimension shp(MAXTOP,maxsh,MAXQPT), 794 & shgl(MAXTOP,nsd,maxsh,MAXQPT), 795 & shpb(MAXTOP,maxsh,MAXQPT), 796 & shglb(MAXTOP,nsd,maxsh,MAXQPT) 797 real*8 elDw(numel) 798c 799c.... *******************>> Element Data Formation <<****************** 800c 801c 802c.... form the LHS matrices, the residual vector, and the block 803c diagonal preconditioner 804c 805 call ElmGMRSclr(y, ac, 806 & x, elDw, shp, shgl, 807 & iBC, BC, 808 & shpb, shglb, 809 & rest, 810 & rmest, Diag, iper, 811 & ilwork, EGmasst) 812c 813c.... **********************>> EBE - GMRES <<******************** 814c 815 call timer ('Solver ') 816c 817c.... ------------------------> Initialization <----------------------- 818c 819c 820 id = isclr+5 821c.... initialize Dy 822c 823 Dyt = zero 824c 825c.... Right preconditioner 826c 827 call i3preSclr(Diag, Dinv, EGmassT, ilwork) 828c 829c Check the residual for divering trend 830c 831 call rstatCheckSclr(rest,ilwork,y,ac) 832 833c 834c.... copy rest in uBrgt(1) 835c 836 uBrgt(:,1) = rest 837c 838c.... calculate norm of residual 839c 840 tempt = rest**2 841 842 call sumgat (tempt, 1, summed, ilwork) 843 unorm = sqrt(summed) 844c 845c.... check if GMRES iterations are required 846c 847 iKst = 0 848 lGMRESt = 0 849c 850c.... if we are down to machine precision, don't bother solving 851c 852 if (unorm .lt. 100.*epsM**2) goto 3000 853c 854c.... set up tolerance of the Hessenberg's problem 855c 856 epsnrm = etol * unorm 857c 858c.... ------------------------> GMRES Loop <------------------------- 859c 860c.... loop through GMRES cycles 861c 862 do 2000 mGMRES = 1, nGMRES 863 lGMRESt = mGMRES - 1 864c 865 if (lGMRESt .gt. 0) then 866c 867c.... if GMRES restarts are necessary, calculate R - A x 868c 869c 870 871c.... perform the A x product 872c 873 call Au1GMRSclr (EGmasst, tempt, ilwork, iper) 874c 875c.... periodic nodes have to assemble results to their partners 876c 877c call bc3perSclr (iBC, tempt, iper) 878c 879c.... subtract A x from residual and calculate the norm 880c 881 tempt = rest - tempt 882 uBrgt(:,1) = tempt 883c 884c.... calculate the norm 885c 886 tempt = tempt**2 887 call sumgat (tempt, 1, summed, ilwork) 888 unorm = sqrt(summed) 889c 890c.... flop count 891c 892 flops = flops + ndof*numnp+numnp 893c 894 endif 895c 896c.... set up RHS of the Hessenberg's problem 897c 898 call clear (eBrg, Kspace+1) 899 call clear (HBrg, Kspace+1) 900 eBrg(1) = unorm 901c 902c.... normalize the first Krylov vector 903c 904 uBrgt(:,1) = uBrgt(:,1) / unorm 905c 906c.... loop through GMRES iterations 907c 908 do 1000 iK = 1, Kspace 909 iKst = iK 910 911 uBrgt(:,iKst+1) = uBrgt(:,iKst) 912 913c.... Au product ( u_{i+1} <-- EGmass u_{i+1} ) 914c 915 call Au1GMRSclr ( EGmasst, uBrgt(:,iKst+1), ilwork, iper ) 916 917c 918c.... periodic nodes have to assemble results to their partners 919c 920 call bc3perSclr (iBC, uBrgt(:,iKst+1), iper) 921c 922c.... orthogonalize and get the norm 923c 924 do jK = 1, iKst+1 925c 926 if (jK .eq. 1) then 927c 928 tempt = uBrgt(:,iKst+1) * uBrgt(:,1) ! {u_{i+1}*u_1} vector 929 call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_1) 930c 931 else 932c 933c project off jK-1 vector 934c 935 uBrgt(:,iKst+1) = uBrgt(:,iKst+1) - beta * uBrgt(:,jK-1) 936c 937 tempt = uBrgt(:,iKst+1) * uBrgt(:,jK) !{u_{i+1}*u_j} vector 938 call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_j) 939c 940 endif 941c 942 HBrg(jK,iKst) = beta ! put this in the Hessenberg Matrix 943c 944 enddo 945c 946 flops = flops + (3*iKst+1)*ndof*numnp+(iKst+1)*numnp 947c 948c the last inner product was with what was left of the vector (after 949c projecting off all of the previous vectors 950c 951 unorm = sqrt(beta) 952 HBrg(iKst+1,iKst) = unorm ! this fills the 1 sub diagonal band 953c 954c.... normalize the Krylov vector 955c 956 uBrgt(:,iKst+1) = uBrgt(:,iKst+1) / unorm ! normalize the next Krylov 957c vector 958c 959c.... construct and reduce the Hessenberg Matrix 960c since there is only one subdiagonal we can use a Givens rotation to 961c rotate off each subdiagonal AS IT IS FORMED. We do this because it 962c allows us to check progress of solution and quit when satisfied. Note 963c that all future K vects will put a subdiagonal in the next column so 964c there is no penalty to work ahead as the rotation for the next vector 965c will be unaffected by this rotation. 966 967c 968c H Y = E ========> R_i H Y = R_i E 969c 970 do jK = 1, iKst-1 971 tmp = Rcos(jK) * HBrg(jK, iKst) + 972 & Rsin(jK) * HBrg(jK+1,iKst) 973 HBrg(jK+1,iKst) = -Rsin(jK) * HBrg(jK, iKst) + 974 & Rcos(jK) * HBrg(jK+1,iKst) 975 HBrg(jK, iKst) = tmp 976 enddo 977c 978 tmp = sqrt(HBrg(iKst,iKst)**2 + HBrg(iKst+1,iKst)**2) 979 Rcos(iKst) = HBrg(iKst, iKst) / tmp 980 Rsin(iKst) = HBrg(iKst+1,iKst) / tmp 981 HBrg(iKst, iKst) = tmp 982 HBrg(iKst+1,iKst) = zero 983c 984c.... rotate eBrg R_i E 985c 986 tmp = Rcos(iKst)*eBrg(iKst) + Rsin(iKst)*eBrg(iKst+1) 987 eBrg(iKst+1)=-Rsin(iKst)*eBrg(iKst) + Rcos(iKst)*eBrg(iKst+1) 988 eBrg(iKst) = tmp 989c 990c.... check for convergence 991c 992 ercheck=eBrg(iKst+1) 993 ntotGMt = ntotGMt + 1 994 if (abs(eBrg(iKst+1)) .le. epsnrm) exit 995c 996c.... end of GMRES iteration loop 997c 998 1000 continue 999c 1000c.... -------------------------> Solution <------------------------ 1001c 1002c.... if converged or end of Krylov space 1003c 1004c.... solve for yBrg 1005c 1006 do jK = iKst, 1, -1 1007 yBrg(jK) = eBrg(jK) / HBrg(jK,jK) 1008 do lK = 1, jK-1 1009 eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK) 1010 enddo 1011 enddo 1012c 1013c.... update Dy 1014c 1015 do jK = 1, iKst 1016 Dyt = Dyt + yBrg(jK) * uBrgt(:,jK) 1017 enddo 1018c 1019c.... flop count 1020c 1021 flops = flops + (3*iKst+1)*ndof*numnp 1022c 1023c.... check for convergence 1024c 1025 if (abs(eBrg(iKst+1)) .le. epsnrm) exit 1026c 1027c.... end of mGMRES loop 1028c 1029 2000 continue 1030c 1031c.... ------------------------> Converged <------------------------ 1032c 1033c.... if converged 1034c 1035 3000 continue 1036c 1037c.... back precondition the result 1038c 1039 Dyt(:) = Dyt(:) * Dinv(:) 1040c 1041c.... output the statistics 1042c 1043 call rstatSclr( rest, ilwork,lgmrest,iKst) 1044c.... stop the timer 1045c 1046 3002 continue ! no solve just res. 1047 call timer ('Back ') 1048c 1049c.... end 1050c 1051 return 1052 end 1053 1054 1055