1 subroutine SolGMRe (y, ac, yold, acold, 2 & x, iBC, BC, EGmass, 3 & res, BDiag, HBrg, eBrg, 4 & yBrg, Rcos, Rsin, iper, 5 & ilwork, shp, shgl, shpb, 6 & shglb, Dy, rerr) 7c 8c---------------------------------------------------------------------- 9c 10c This is the preconditioned GMRES driver routine. 11c 12c input: 13c y (nshg,ndof) : Y-variables at n+alpha_v 14c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 15c yold (nshg,ndof) : Y-variables at beginning of step 16c acold (nshg,ndof) : Primvar. accel. variable at begng step 17c x (numnp,nsd) : node coordinates 18c iBC (nshg) : BC codes 19c BC (nshg,ndofBC) : BC constraint parameters 20c HBrg (Kspace+1,Kspace) : Hessenberg matrix (LHS matrix) 21c eBrg (Kspace+1) : RHS of Hessenberg minim. problem 22c yBrg (Kspace) : solution of Hessenberg minim. problem 23c Rcos (Kspace) : Rotational cosine of QR algorithm 24c Rsin (Kspace) : Rotational sine of QR algorithm 25c shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 26c shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 27c 28c output: 29c res (nshg,nflow) : preconditioned residual 30c BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 31c 32c 33c Zdenek Johan, Winter 1991. (Fortran 90) 34c---------------------------------------------------------------------- 35c 36 use pointer_data 37 38 include "common.h" 39 include "mpif.h" 40 include "auxmpi.h" 41c 42 dimension y(nshg,ndof), ac(nshg,ndof), 43 & yold(nshg,ndof), acold(nshg,ndof), 44 & x(numnp,nsd), 45 & iBC(nshg), BC(nshg,ndofBC), 46 & res(nshg,nflow), 47 & BDiag(nshg,nflow,nflow), 48 & HBrg(Kspace+1,*), eBrg(*), 49 & yBrg(*), Rcos(*), 50 & Rsin(*), ilwork(nlwork), 51 & iper(nshg), EGmass(numel,nedof,nedof)!, 52ctoomuchmem & Binv(numel,nedof,nedof) 53c 54 dimension Dy(nshg,nflow), rmes(nshg,nflow), 55 & temp(nshg,nflow), 56 & uBrg(nshg,nflow,Kspace+1) 57c 58 dimension shp(MAXTOP,maxsh,MAXQPT), 59 & shgl(MAXTOP,nsd,maxsh,MAXQPT), 60 & shpb(MAXTOP,maxsh,MAXQPT), 61 & shglb(MAXTOP,nsd,maxsh,MAXQPT) 62 real*8 rerr(nshg,10) 63c 64c.... *******************>> Element Data Formation <<****************** 65c 66c 67c.... set the parameters for flux and surface tension calculations 68c 69c 70 idflx = 0 71 if(idiff >= 1) idflx= idflx + (nflow-1) * nsd 72 if (isurf == 1) idflx=idflx + nsd 73c 74c.... form the LHS matrices, the residual vector, and the block 75c diagonal preconditioner 76c 77 call ElmGMRe(y, ac, x, 78 & shp, shgl, iBC, 79 & BC, shpb, 80 & shglb, res, 81 & rmes, BDiag, iper, 82 & ilwork, EGmass, rerr ) 83c 84c.... **********************>> EBE - GMRES <<******************** 85c 86 call timer ('Solver ') 87c 88c.... ------------------------> Initialization <----------------------- 89c 90c 91c.... LU decompose the block diagonals 92c 93 if (iprec .ne. 0) 94 & call i3LU (BDiag, res, 'LU_Fact ') 95 96c 97c.... block diagonal precondition residual 98c 99 call i3LU (BDiag, res, 'forward ') 100c 101c.... initialize Dy 102c 103 Dy = zero 104c 105c.... Pre-precondition the LHS mass matrix and set up the element 106c by element preconditioners 107c 108ctoomuchmemory note that Binv is demoted from huge array to just one 109c real*8 in i3pre because it takes too much memory 110 111 call i3pre (BDiag, Binv, EGmass, ilwork) 112c 113c.... left EBE precondition the residual 114c 115ctoomuchmem call i3Pcond (Binv, res, ilwork, 'L_Pcond ') 116c 117c.... copy res in uBrg(1) 118c 119 uBrg(:,:,1) = res 120c 121c.... calculate norm of residual 122c 123 temp = res**2 124 125 call sumgat (temp, nflow, summed, ilwork) 126 unorm = sqrt(summed) 127c 128c.... check if GMRES iterations are required 129c 130 iKs = 0 131 lGMRES = 0 132c 133c.... if we are down to machine precision, don't bother solving 134c 135 if (unorm .lt. 100.*epsM**2) goto 3000 136c 137c.... set up tolerance of the Hessenberg's problem 138c 139 epsnrm = etol * unorm 140c 141c.... ------------------------> GMRES Loop <------------------------- 142c 143c.... loop through GMRES cycles 144c 145 do 2000 mGMRES = 1, nGMRES 146 lGMRES = mGMRES - 1 147c 148 if (lGMRES .gt. 0) then 149c 150c.... if GMRES restarts are necessary, calculate R - A x 151c 152c 153c.... right precondition Dy 154c 155 temp = Dy 156 157ctoomuchmem call i3Pcond (Binv, temp, ilwork, 'R_Pcond ') 158c 159c.... perform the A x product 160c 161 call Au1GMR (EGmass, temp, ilwork, iBC,iper) 162c 163c.... periodic nodes have to assemble results to their partners 164c 165 call bc3per (iBC, temp, iper, ilwork, nflow) 166c 167c.... left preconditioning 168c 169ctoomuchmem call i3Pcond (Binv, temp, ilwork, 'L_Pcond ') 170c 171c.... subtract A x from residual and calculate the norm 172c 173 temp = res - temp 174 uBrg(:,:,1) = temp 175c 176c.... calculate the norm 177c 178 temp = temp**2 179 call sumgat (temp, nflow, summed, ilwork) 180 unorm = sqrt(summed) 181c 182c.... flop count 183c 184 ! flops = flops + nflow*nshg+nshg 185c 186 endif 187c 188c.... set up RHS of the Hessenberg's problem 189c 190 call clear (eBrg, Kspace+1) 191 eBrg(1) = unorm 192c 193c.... normalize the first Krylov vector 194c 195 uBrg(:,:,1) = uBrg(:,:,1) / unorm 196c 197c.... loop through GMRES iterations 198c 199 do 1000 iK = 1, Kspace 200 iKs = iK 201 202 uBrg(:,:,iKs+1) = uBrg(:,:,iKs) 203c 204c.... right EBE precondition the LHS ( u_{i+1} <-- inverse(U) u_i ) 205c 206ctoomuchmem call i3Pcond (Binv, uBrg(:,:,iKs+1), ilwork, 'R_Pcond ') 207c 208c.... Au product ( u_{i+1} <-- EGmass u_{i+1} ) 209c 210 call Au1GMR ( EGmass, uBrg(:,:,iKs+1), ilwork, iBC,iper) 211c 212c.... periodic nodes have to assemble results to their partners 213c 214 call bc3per (iBC, uBrg(:,:,iKs+1), iper, ilwork, nflow) 215 216c 217c.... left EBE precondition the LHS ( u_{i+1} <-- inverse(L) u_{i+1} ) 218c 219ctoomuchmem call i3Pcond (Binv, uBrg(:,:,iKs+1), ilwork, 'L_Pcond ') 220c 221c.... orthogonalize and get the norm 222c 223 do jK = 1, iKs+1 224c 225 if (jK .eq. 1) then 226c 227 temp = uBrg(:,:,iKs+1) * uBrg(:,:,1) ! {u_{i+1}*u_1} vector 228 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1) 229c 230 else 231c 232c project off jK-1 vector 233c 234 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) - beta * uBrg(:,:,jK-1) 235c 236 temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector 237 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j) 238c 239 endif 240c 241 HBrg(jK,iKs) = beta ! put this in the Hessenberg Matrix 242c 243 enddo 244c 245 ! flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp 246c 247c the last inner product was with what was left of the vector (after 248c projecting off all of the previous vectors 249c 250 unorm = sqrt(beta) 251 HBrg(iKs+1,iKs) = unorm ! this fills the 1 sub diagonal band 252c 253c.... normalize the Krylov vector 254c 255 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm ! normalize the next Krylov 256c vector 257c 258c.... construct and reduce the Hessenberg Matrix 259c since there is only one subdiagonal we can use a Givens rotation to 260c rotate off each subdiagonal AS IT IS FORMED. We do this because it 261c allows us to check progress of solution and quit when satisfied. Note 262c that all future K vects will put a subdiagonal in the next column so 263c there is no penalty to work ahead as the rotation for the next vector 264c will be unaffected by this rotation. 265 266c 267c H Y = E ========> R_i H Y = R_i E 268c 269 do jK = 1, iKs-1 270 tmp = Rcos(jK) * HBrg(jK, iKs) + 271 & Rsin(jK) * HBrg(jK+1,iKs) 272 HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK, iKs) + 273 & Rcos(jK) * HBrg(jK+1,iKs) 274 HBrg(jK, iKs) = tmp 275 enddo 276c 277 tmp = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2) 278 Rcos(iKs) = HBrg(iKs, iKs) / tmp 279 Rsin(iKs) = HBrg(iKs+1,iKs) / tmp 280 HBrg(iKs, iKs) = tmp 281 HBrg(iKs+1,iKs) = zero 282c 283c.... rotate eBrg R_i E 284c 285 tmp = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1) 286 eBrg(iKs+1) =-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1) 287 eBrg(iKs) = tmp 288c 289c.... check for convergence 290c 291 ntotGM = ntotGM + 1 292 echeck=abs(eBrg(iKs+1)) 293 if (echeck .le. epsnrm) exit 294c 295c.... end of GMRES iteration loop 296c 297 1000 continue 298c 299c.... -------------------------> Solution <------------------------ 300c 301c.... if converged or end of Krylov space 302c 303c.... solve for yBrg 304c 305 do jK = iKs, 1, -1 306 yBrg(jK) = eBrg(jK) / HBrg(jK,jK) 307 do lK = 1, jK-1 308 eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK) 309 enddo 310 enddo 311c 312c.... update Dy 313c 314 do jK = 1, iKs 315 Dy = Dy + yBrg(jK) * uBrg(:,:,jK) 316 enddo 317c 318c.... flop count 319c 320 ! flops = flops + (3*iKs+1)*nflow*nshg 321c 322c.... check for convergence 323c 324 325 echeck=abs(eBrg(iKs+1)) 326 if (echeck .le. epsnrm) exit 327 if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction', 328 & (one-echeck/unorm)/(one-etol)*100 329c 330c.... end of mGMRES loop 331c 332 2000 continue 333c 334c.... ------------------------> Converged <------------------------ 335c 336c.... if converged 337c 338 3000 continue 339c 340c.... back EBE precondition the results 341c 342ctoomuchmem call i3Pcond (Binv, Dy, ilwork, 'R_Pcond ') 343c 344c.... back block-diagonal precondition the results 345c 346 call i3LU (BDiag, Dy, 'backward') 347c 348c 349c.... output the statistics 350c 351 call rstat (res, ilwork) 352c 353c.... stop the timer 354c 355 3002 continue ! no solve just res. 356 call timer ('Back ') 357c 358c.... end 359c 360 return 361 end 362 363 364 365 366 367 subroutine SolGMRs(y, ac, yold, acold, 368 & x, iBC, BC, 369 & col, row, lhsk, 370 & res, BDiag, HBrg, eBrg, 371 & yBrg, Rcos, Rsin, iper, 372 & ilwork, shp, shgl, shpb, 373 & shglb, Dy, rerr) 374c 375c---------------------------------------------------------------------- 376c 377c This is the preconditioned GMRES driver routine. 378c 379c input: 380c y (nshg,ndof) : Y-variables at n+alpha_v 381c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 382c yold (nshg,ndof) : Y-variables at beginning of step 383c acold (nshg,ndof) : Primvar. accel. variable at begng step 384c x (numnp,nsd) : node coordinates 385c iBC (nshg) : BC codes 386c BC (nshg,ndofBC) : BC constraint parameters 387c HBrg (Kspace+1,Kspace) : Hessenberg matrix (LHS matrix) 388c eBrg (Kspace+1) : RHS of Hessenberg minim. problem 389c yBrg (Kspace) : solution of Hessenberg minim. problem 390c Rcos (Kspace) : Rotational cosine of QR algorithm 391c Rsin (Kspace) : Rotational sine of QR algorithm 392c shp(b) (nen,maxsh,melCat) : element shape functions (boundary) 393c shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions 394c 395c output: 396c res (nshg,nflow) : preconditioned residual 397c BDiag (nshg,nflow,nflow) : block-diagonal preconditioner 398c 399c 400c Zdenek Johan, Winter 1991. (Fortran 90) 401c---------------------------------------------------------------------- 402c 403 use pointer_data 404 405 include "common.h" 406 include "mpif.h" 407 include "auxmpi.h" 408c 409 integer col(nshg+1), row(nnz*nshg) 410 real*8 lhsK(nflow*nflow,nnz_tot) 411 412 413 dimension y(nshg,ndof), ac(nshg,ndof), 414 & yold(nshg,ndof), acold(nshg,ndof), 415 & x(numnp,nsd), 416 & iBC(nshg), BC(nshg,ndofBC), 417 & res(nshg,nflow), 418 & BDiag(nshg,nflow,nflow), 419 & HBrg(Kspace+1,Kspace), eBrg(Kspace+1), 420 & yBrg(Kspace), Rcos(Kspace), 421 & Rsin(Kspace), ilwork(nlwork), 422 & iper(nshg) 423c 424 dimension Dy(nshg,nflow), rmes(nshg,nflow), 425 & temp(nshg,nflow), 426 & uBrg(nshg,nflow,Kspace+1) 427c 428 dimension shp(MAXTOP,maxsh,MAXQPT), 429 & shgl(MAXTOP,nsd,maxsh,MAXQPT), 430 & shpb(MAXTOP,maxsh,MAXQPT), 431 & shglb(MAXTOP,nsd,maxsh,MAXQPT) 432 real*8 rerr(nshg,10) 433c 434c 435c.... *******************>> Element Data Formation <<****************** 436c 437c 438c.... set the parameters for flux and surface tension calculations 439c 440c 441 idflx = 0 442 if(idiff >= 1) idflx= idflx + (nflow-1) * nsd 443 if (isurf == 1) idflx=idflx + nsd 444c 445c.... form the LHS matrices, the residual vector, and the block 446c diagonal preconditioner 447c 448 call ElmGMRs(y, ac, x, 449 & shp, shgl, iBC, 450 & BC, shpb, 451 & shglb, res, 452 & rmes, BDiag, iper, 453 & ilwork, lhsK, col, 454 & row, rerr ) 455 call rstat (res, ilwork) 456 if(ntotGM.eq.0) resfrt=zero !don't let this mess up scaled dB 457 if(myrank.eq.master) then 458 write(*,*)'residual prior to sbd-preconditioning' 459 endif 460c 461 462 call tnanq(res,5, 'res_egmr') 463 call tnanq(BDiag,25, 'bdg_egmr') 464c 465c.... **********************>> EBE - GMRES <<******************** 466c 467 call timer ('Solver ') 468c 469c.... ------------------------> Initialization <----------------------- 470c 471c 472c.... LU decompose the block diagonals 473c 474 if (iprec .ne. 0) then 475 call i3LU (BDiag, res, 'LU_Fact ') 476 if (numpe > 1) then 477 call commu (BDiag , ilwork, nflow*nflow , 'out') 478 endif 479 endif 480c 481c.... block diagonal precondition residual 482c 483 call i3LU (BDiag, res, 'forward ') 484c 485c Check the residual for divering trend 486c 487 call rstatCheck(res,ilwork,y,ac) 488c 489c.... initialize Dy 490c 491 Dy = zero 492c 493c.... Pre-precondition the LHS mass matrix and set up the sparse 494c preconditioners 495c 496 497 if(lhs.eq.1) call Spsi3pre (BDiag, lhsK, col, row) 498c 499c.... copy res in uBrg(1) 500c 501 uBrg(:,:,1) = res 502c 503c.... calculate norm of residual 504c 505 temp = res**2 506 507 call sumgat (temp, nflow, summed, ilwork) 508 unorm = sqrt(summed) 509c 510c.... check if GMRES iterations are required 511c 512 iKs = 0 513 lGMRESs = 0 514c 515c.... if we are down to machine precision, don't bother solving 516c 517 if (unorm .lt. 100.*epsM**2) goto 3000 518c 519c.... set up tolerance of the Hessenberg's problem 520c 521 epsnrm = etol * unorm 522c 523c.... ------------------------> GMRES Loop <------------------------- 524c 525c.... loop through GMRES cycles 526c 527 do 2000 mGMRES = 1, nGMRES 528 lGMRESs = mGMRES - 1 529c 530 if (lGMRES .gt. 0) then 531c 532c.... if GMRES restarts are necessary, calculate R - A x 533c 534c 535c.... right precondition Dy 536c 537 temp = Dy 538 539c 540c.... perform the A x product 541c 542 call SparseAp (iper,ilwork,iBC, col, row, lhsK, temp) 543c call tnanq(temp,5, 'q_spAPrs') 544 545c 546c.... periodic nodes have to assemble results to their partners 547c 548 call bc3per (iBC, temp, iper, ilwork, nflow) 549c call tnanq(temp,5, 'q_BCprs') 550c 551c.... subtract A x from residual and calculate the norm 552c 553 temp = res - temp 554 uBrg(:,:,1) = temp 555c 556c.... calculate the norm 557c 558 temp = temp**2 559 call sumgat (temp, nflow, summed, ilwork) 560 unorm = sqrt(summed) 561c 562c.... flop count 563c 564 ! flops = flops + nflow*nshg+nshg 565c 566 endif 567c 568c.... set up RHS of the Hessenberg's problem 569c 570 call clear (eBrg, Kspace+1) 571 eBrg(1) = unorm 572c 573c.... normalize the first Krylov vector 574c 575 uBrg(:,:,1) = uBrg(:,:,1) / unorm 576c 577c.... loop through GMRES iterations 578c 579 do 1000 iK = 1, Kspace 580 iKs = iK 581 582 uBrg(:,:,iKs+1) = uBrg(:,:,iKs) 583c 584c.... Au product ( u_{i+1} <-- EGmass u_{i+1} ) 585c 586 call SparseAp (iper, ilwork, iBC, 587 & col, row, lhsK, 588 & uBrg(:,:,iKs+1) ) 589c call tnanq(uBrg(:,:,iKS+1),5, 'q_spAP') 590 591c 592c.... periodic nodes have to assemble results to their partners 593c 594 call bc3per (iBC, uBrg(:,:,iKs+1), iper, ilwork, nflow) 595c call tnanq(uBrg(:,:,iKS+1),5, 'q_bc') 596 597c 598c.... orthogonalize and get the norm 599c 600 do jK = 1, iKs+1 601c 602 if (jK .eq. 1) then 603c 604 temp = uBrg(:,:,iKs+1) * uBrg(:,:,1) ! {u_{i+1}*u_1} vector 605 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1) 606c 607 else 608c 609c project off jK-1 vector 610c 611 uBrg(:,:,iKs+1)=uBrg(:,:,iKs+1)-beta * uBrg(:,:,jK-1) 612c 613 temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector 614 call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j) 615c 616 endif 617c 618 HBrg(jK,iKs) = beta ! put this in the Hessenberg Matrix 619c 620 enddo 621c 622 ! flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp 623c 624c the last inner product was with what was left of the vector (after 625c projecting off all of the previous vectors 626c 627 if(beta.le.0) write(*,*) 'beta in solgmr non-positive' 628 unorm = sqrt(beta) 629 HBrg(iKs+1,iKs) = unorm ! this fills the 1 sub diagonal band 630c 631c.... normalize the Krylov vector 632c 633 uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm ! normalize the next Krylov 634c vector 635c 636c.... construct and reduce the Hessenberg Matrix 637c since there is only one subdiagonal we can use a Givens rotation to 638c rotate off each subdiagonal AS IT IS FORMED. We do this because it 639c allows us to check progress of solution and quit when satisfied. Note 640c that all future K vects will put a subdiagonal in the next column so 641c there is no penalty to work ahead as the rotation for the next vector 642c will be unaffected by this rotation. 643 644c 645c H Y = E ========> R_i H Y = R_i E 646c 647 do jK = 1, iKs-1 648 tmp = Rcos(jK) * HBrg(jK, iKs) + 649 & Rsin(jK) * HBrg(jK+1,iKs) 650 HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK, iKs) + 651 & Rcos(jK) * HBrg(jK+1,iKs) 652 HBrg(jK, iKs) = tmp 653 enddo 654c 655 tmp = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2) 656 Rcos(iKs) = HBrg(iKs, iKs) / tmp 657 Rsin(iKs) = HBrg(iKs+1,iKs) / tmp 658 HBrg(iKs, iKs)= tmp 659 HBrg(iKs+1,iKs)= zero 660c 661c.... rotate eBrg R_i E 662c 663 tmp = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1) 664 eBrg(iKs+1)=-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1) 665 eBrg(iKs) = tmp 666c 667c.... check for convergence 668c 669 ntotGM = ntotGM + 1 670 echeck=abs(eBrg(iKs+1)) 671 if (echeck .le. epsnrm.and. iKs .ge. minIters) exit 672c 673c.... end of GMRES iteration loop 674c 675 1000 continue 676c 677c.... -------------------------> Solution <------------------------ 678c 679c.... if converged or end of Krylov space 680c 681c.... solve for yBrg 682c 683 do jK = iKs, 1, -1 684 yBrg(jK) = eBrg(jK) / HBrg(jK,jK) 685 do lK = 1, jK-1 686 eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK) 687 enddo 688 enddo 689c 690c.... update Dy 691c 692 do jK = 1, iKs 693 Dy = Dy + yBrg(jK) * uBrg(:,:,jK) 694 enddo 695c 696c.... flop count 697c 698 ! flops = flops + (3*iKs+1)*nflow*nshg 699c 700c.... check for convergence 701c 702 echeck=abs(eBrg(iKs+1)) 703 if (echeck .le. epsnrm) exit 704! if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction', 705! & (one-echeck*etol/epsnrm)/(one-etol)*100 706 707c 708c.... end of mGMRES loop 709c 710 2000 continue 711c 712c.... ------------------------> Converged <------------------------ 713c 714c.... if converged 715c 716 3000 continue 717 718c 719c 720c.... back block-diagonal precondition the results 721c 722 call i3LU (BDiag, Dy, 'backward') 723c 724c 725c.... output the statistics 726c 727 call rstat (res, ilwork) 728 729 if(myrank.eq.master) then 730 if (echeck .le. epsnrm) then 731 write(*,*) 732 else 733 write(*,*)'solver tolerance %satisfaction', 734 & (one-echeck*etol/epsnrm)/(one-etol)*100 735 endif 736 endif 737c 738c.... stop the timer 739c 740 3002 continue ! no solve just res. 741 call timer ('Back ') 742c 743c.... end 744c 745 return 746 end 747 748 subroutine SolGMRSclr(y, ac, yold, 749 & acold, EGmasst, 750 & x, elDw, 751 & iBC, BC, 752 & rest, HBrg, eBrg, 753 & yBrg, Rcos, Rsin, iper, 754 & ilwork, 755 & shp, shgl, 756 & shpb, shglb, Dyt) 757c 758c---------------------------------------------------------------------- 759c 760c This is the preconditioned GMRES driver routine. 761c 762c input: 763c y (nshg,ndof) : Y-variables at n+alpha_f 764c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 765c yold (nshg,ndof) : Y-variables at beginning of step 766c acold (nshg,ndof) : Primvar. accel. variable at begng step 767c x (numnp,nsd) : node coordinates 768c iBC (nshg) : BC codes 769c BC (nshg,ndofBC) : BC constraint parameters 770c iper (nshg) : periodic nodal information 771c 772c output: 773c y (nshg,ndof) : Y-variables at n+alpha_f 774c ac (nshg,ndof) : Primvar. accel. variable n+alpha_m 775c HBrg (Kspace+1,Kspace) : Hessenberg matrix (LHS matrix) 776c eBrg (Kspace+1) : RHS of Hessenberg minim. problem 777c yBrg (Kspace) : solution of Hessenberg minim. problem 778c Rcos (Kspace) : Rotational cosine of QR algorithm 779c Rsin (Kspace) : Rotational sine of QR algorithm 780c output: 781c rest (numnp) : preconditioned residual 782c 783c 784c Zdenek Johan, Winter 1991. (Fortran 90) 785c---------------------------------------------------------------------- 786c 787 use pointer_data 788 789 include "common.h" 790 include "mpif.h" 791 include "auxmpi.h" 792c 793 dimension y(nshg,ndof), ac(nshg,ndof), 794 & yold(nshg,ndof), acold(nshg,ndof), 795 & x(numnp,nsd), 796 & iBC(nshg), BC(nshg,ndofBC), 797 & rest(nshg), 798 & Diag(nshg), 799 & HBrg(Kspace+1,*), eBrg(*), 800 & yBrg(*), Rcos(*), 801 & Rsin(*), ilwork(nlwork), 802 & iper(nshg), EGmasst(numel,nshape,nshape) 803c 804 dimension Dyt(nshg), rmest(nshg), 805 & tempt(nshg), Dinv(nshg), 806 & uBrgt(nshg,Kspace+1) 807c 808 dimension shp(MAXTOP,maxsh,MAXQPT), 809 & shgl(MAXTOP,nsd,maxsh,MAXQPT), 810 & shpb(MAXTOP,maxsh,MAXQPT), 811 & shglb(MAXTOP,nsd,maxsh,MAXQPT) 812 real*8 elDw(numel) 813c 814c.... *******************>> Element Data Formation <<****************** 815c 816c 817c.... form the LHS matrices, the residual vector, and the block 818c diagonal preconditioner 819c 820 call ElmGMRSclr(y, ac, 821 & x, elDw, shp, shgl, 822 & iBC, BC, 823 & shpb, shglb, 824 & rest, 825 & rmest, Diag, iper, 826 & ilwork, EGmasst) 827 call rstat (rest, ilwork) 828 if(ntotGM.eq.0) resfrts=zero !don't let this mess up scaled dB 829 if(myrank.eq.master) then 830 write(*,*)'residual prior to sbd-preconditioning' 831 endif 832c 833c.... **********************>> EBE - GMRES <<******************** 834c 835 call timer ('Solver ') 836c 837c.... ------------------------> Initialization <----------------------- 838c 839c 840 id = isclr+5 841c.... initialize Dy 842c 843 Dyt = zero 844c 845c.... Right preconditioner 846c 847 call i3preSclr(Diag, Dinv, EGmassT, ilwork) 848c 849c Check the residual for divering trend 850c 851 852 call rstatCheckSclr(rest,ilwork,y,ac) 853 854c 855c.... copy rest in uBrgt(1) 856c 857 uBrgt(:,1) = rest 858c 859c.... calculate norm of residual 860c 861 tempt = rest**2 862 863 call sumgat (tempt, 1, summed, ilwork) 864 unorm = sqrt(summed) 865c 866c.... check if GMRES iterations are required 867c 868 iKss = 0 869 lGMRESt = 0 870c 871c.... if we are down to machine precision, don't bother solving 872c 873 if (unorm .lt. 100.*epsM**2) goto 3000 874c 875c.... set up tolerance of the Hessenberg's problem 876c 877 epsnrm = etol * unorm 878c 879c.... ------------------------> GMRES Loop <------------------------- 880c 881c.... loop through GMRES cycles 882c 883 do 2000 mGMRES = 1, nGMRES 884 lGMRESt = mGMRES - 1 885c 886 if (lGMRESt .gt. 0) then 887c 888c.... if GMRES restarts are necessary, calculate R - A x 889c 890c 891 892c.... perform the A x product 893c 894 call Au1GMRSclr (EGmasst, tempt, ilwork, iper) 895c 896c.... periodic nodes have to assemble results to their partners 897c 898c call bc3perSclr (iBC, tempt, iper) 899c 900c.... subtract A x from residual and calculate the norm 901c 902 tempt = rest - tempt 903 uBrgt(:,1) = tempt 904c 905c.... calculate the norm 906c 907 tempt = tempt**2 908 call sumgat (tempt, 1, summed, ilwork) 909 unorm = sqrt(summed) 910c 911c.... flop count 912c 913 ! flops = flops + ndof*numnp+numnp 914c 915 endif 916c 917c.... set up RHS of the Hessenberg's problem 918c 919 call clear (eBrg, Kspace+1) 920 call clear (HBrg, Kspace+1) 921 eBrg(1) = unorm 922c 923c.... normalize the first Krylov vector 924c 925 uBrgt(:,1) = uBrgt(:,1) / unorm 926c 927c.... loop through GMRES iterations 928c 929 do 1000 iK = 1, Kspace 930 iKss = iK 931 932 uBrgt(:,iKss+1) = uBrgt(:,iKss) 933 934c.... Au product ( u_{i+1} <-- EGmass u_{i+1} ) 935c 936 call Au1GMRSclr ( EGmasst, uBrgt(:,iKss+1), ilwork, iper ) 937 938c 939c.... periodic nodes have to assemble results to their partners 940c 941 call bc3perSclr (iBC, uBrgt(:,iKss+1), iper) 942c 943c.... orthogonalize and get the norm 944c 945 do jK = 1, iKss+1 946c 947 if (jK .eq. 1) then 948c 949 tempt = uBrgt(:,iKss+1) * uBrgt(:,1) ! {u_{i+1}*u_1} vector 950 call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_1) 951c 952 else 953c 954c project off jK-1 vector 955c 956 uBrgt(:,iKss+1) = uBrgt(:,iKss+1) - beta * uBrgt(:,jK-1) 957c 958 tempt = uBrgt(:,iKss+1) * uBrgt(:,jK) !{u_{i+1}*u_j} vector 959 call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_j) 960c 961 endif 962c 963 HBrg(jK,iKss) = beta ! put this in the Hessenberg Matrix 964c 965 enddo 966c 967 ! flops = flops + (3*iKss+1)*ndof*numnp+(iKss+1)*numnp 968c 969c the last inner product was with what was left of the vector (after 970c projecting off all of the previous vectors 971c 972 unorm = sqrt(beta) 973 HBrg(iKss+1,iKss) = unorm ! this fills the 1 sub diagonal band 974c 975c.... normalize the Krylov vector 976c 977 uBrgt(:,iKss+1) = uBrgt(:,iKss+1) / unorm ! normalize the next Krylov 978c vector 979c 980c.... construct and reduce the Hessenberg Matrix 981c since there is only one subdiagonal we can use a Givens rotation to 982c rotate off each subdiagonal AS IT IS FORMED. We do this because it 983c allows us to check progress of solution and quit when satisfied. Note 984c that all future K vects will put a subdiagonal in the next column so 985c there is no penalty to work ahead as the rotation for the next vector 986c will be unaffected by this rotation. 987 988c 989c H Y = E ========> R_i H Y = R_i E 990c 991 do jK = 1, iKss-1 992 tmp = Rcos(jK) * HBrg(jK, iKss) + 993 & Rsin(jK) * HBrg(jK+1,iKss) 994 HBrg(jK+1,iKss) = -Rsin(jK) * HBrg(jK, iKss) + 995 & Rcos(jK) * HBrg(jK+1,iKss) 996 HBrg(jK, iKss) = tmp 997 enddo 998c 999 tmp = sqrt(HBrg(iKss,iKss)**2 + HBrg(iKss+1,iKss)**2) 1000 Rcos(iKss) = HBrg(iKss, iKss) / tmp 1001 Rsin(iKss) = HBrg(iKss+1,iKss) / tmp 1002 HBrg(iKss, iKss) = tmp 1003 HBrg(iKss+1,iKss) = zero 1004c 1005c.... rotate eBrg R_i E 1006c 1007 tmp = Rcos(iKss)*eBrg(iKss) + Rsin(iKss)*eBrg(iKss+1) 1008 eBrg(iKss+1)=-Rsin(iKss)*eBrg(iKss) + Rcos(iKss)*eBrg(iKss+1) 1009 eBrg(iKss) = tmp 1010c 1011c.... check for convergence 1012c 1013 ercheck=eBrg(iKss+1) 1014 ntotGMs = ntotGMs + 1 1015 if (abs(eBrg(iKss+1)) .le. epsnrm) exit 1016c 1017c.... end of GMRES iteration loop 1018c 1019 1000 continue 1020c 1021c.... -------------------------> Solution <------------------------ 1022c 1023c.... if converged or end of Krylov space 1024c 1025c.... solve for yBrg 1026c 1027 do jK = iKss, 1, -1 1028 yBrg(jK) = eBrg(jK) / HBrg(jK,jK) 1029 do lK = 1, jK-1 1030 eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK) 1031 enddo 1032 enddo 1033c 1034c.... update Dy 1035c 1036 do jK = 1, iKss 1037 Dyt = Dyt + yBrg(jK) * uBrgt(:,jK) 1038 enddo 1039c 1040c.... flop count 1041c 1042 ! flops = flops + (3*iKss+1)*ndof*numnp 1043c 1044c.... check for convergence 1045c 1046 if (abs(eBrg(iKss+1)) .le. epsnrm) exit 1047c 1048c.... end of mGMRES loop 1049c 1050 2000 continue 1051c 1052c.... ------------------------> Converged <------------------------ 1053c 1054c.... if converged 1055c 1056 3000 continue 1057c 1058c.... back precondition the result 1059c 1060 Dyt(:) = Dyt(:) * Dinv(:) 1061c 1062c.... output the statistics 1063c 1064 call rstatSclr(rest, ilwork) 1065c.... stop the timer 1066c 1067 3002 continue ! no solve just res. 1068 call timer ('Back ') 1069c 1070c.... end 1071c 1072 return 1073 end 1074 1075 1076