xref: /phasta/phSolver/compressible/solgmr.f (revision 50a6f6340c649c1738186cf6fd8c42a882135a5f)
1        subroutine SolGMRe (y,         ac,        yold,      acold,
2     &			   x,         iBC,       BC,        EGmass,
3     &                     res,       BDiag,     HBrg,      eBrg,
4     &                     yBrg,      Rcos,      Rsin,      iper,
5     &                     ilwork,    shp,       shgl,      shpb,
6     &                     shglb,     Dy, rerr)
7c
8c----------------------------------------------------------------------
9c
10c  This is the preconditioned GMRES driver routine.
11c
12c input:
13c  y      (nshg,ndof)           : Y-variables at n+alpha_v
14c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
15c  yold   (nshg,ndof)           : Y-variables at beginning of step
16c  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
17c  x      (numnp,nsd)            : node coordinates
18c  iBC    (nshg)                : BC codes
19c  BC     (nshg,ndofBC)         : BC constraint parameters
20c  HBrg   (Kspace+1,Kspace)      : Hessenberg matrix (LHS matrix)
21c  eBrg   (Kspace+1)             : RHS      of Hessenberg minim. problem
22c  yBrg   (Kspace)               : solution of Hessenberg minim. problem
23c  Rcos   (Kspace)               : Rotational cosine of QR algorithm
24c  Rsin   (Kspace)               : Rotational sine   of QR algorithm
25c  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
26c  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
27c
28c output:
29c  res    (nshg,nflow)           : preconditioned residual
30c  BDiag  (nshg,nflow,nflow)      : block-diagonal preconditioner
31c
32c
33c Zdenek Johan,  Winter 1991.  (Fortran 90)
34c----------------------------------------------------------------------
35c
36        use pointer_data
37
38        include "common.h"
39        include "mpif.h"
40        include "auxmpi.h"
41c
42        dimension y(nshg,ndof),             ac(nshg,ndof),
43     &            yold(nshg,ndof),          acold(nshg,ndof),
44     &            x(numnp,nsd),
45     &            iBC(nshg),                BC(nshg,ndofBC),
46     &            res(nshg,nflow),
47     &            BDiag(nshg,nflow,nflow),
48     &            HBrg(Kspace+1,*),         eBrg(*),
49     &            yBrg(*),                  Rcos(*),
50     &            Rsin(*),                  ilwork(nlwork),
51     &            iper(nshg),               EGmass(numel,nedof,nedof)!,
52ctoomuchmem     &            Binv(numel,nedof,nedof)
53c
54        dimension Dy(nshg,nflow),            rmes(nshg,nflow),
55     &            temp(nshg,nflow),
56     &            uBrg(nshg,nflow,Kspace+1)
57c
58        dimension shp(MAXTOP,maxsh,MAXQPT),
59     &            shgl(MAXTOP,nsd,maxsh,MAXQPT),
60     &            shpb(MAXTOP,maxsh,MAXQPT),
61     &            shglb(MAXTOP,nsd,maxsh,MAXQPT)
62      real*8    rerr(nshg,10)
63c
64c.... *******************>> Element Data Formation <<******************
65c
66c
67c.... set the parameters for flux and surface tension calculations
68c
69c
70        idflx = 0
71        if(idiff >= 1)  idflx= idflx + (nflow-1) * nsd
72        if (isurf == 1) idflx=idflx + nsd
73c
74c.... form the LHS matrices, the residual vector, and the block
75c     diagonal preconditioner
76c
77        call ElmGMRe(y,             ac,            x,
78     &               shp,           shgl,          iBC,
79     &               BC,            shpb,
80     &               shglb,         res,
81     &               rmes,          BDiag,         iper,
82     &               ilwork,        EGmass,        rerr )
83c
84c.... **********************>>    EBE - GMRES    <<********************
85c
86        call timer ('Solver  ')
87c
88c.... ------------------------> Initialization <-----------------------
89c
90c
91c.... LU decompose the block diagonals
92c
93        if (iprec .ne. 0)
94     &  call i3LU (BDiag, res,  'LU_Fact ')
95
96c
97c.... block diagonal precondition residual
98c
99        call i3LU (BDiag, res,  'forward ')
100c
101c.... initialize Dy
102c
103        Dy = zero
104c
105c.... Pre-precondition the LHS mass matrix and set up the element
106c     by element preconditioners
107c
108ctoomuchmemory note that Binv is demoted from huge array to just one
109c        real*8 in i3pre because it takes too much memory
110
111        call i3pre (BDiag,    Binv,   EGmass,  ilwork)
112c
113c.... left EBE precondition the residual
114c
115ctoomuchmem        call i3Pcond (Binv,  res,  ilwork,   'L_Pcond ')
116c
117c.... copy res in uBrg(1)
118c
119        uBrg(:,:,1) = res
120c
121c.... calculate norm of residual
122c
123        temp  = res**2
124
125        call sumgat (temp, nflow, summed, ilwork)
126        unorm = sqrt(summed)
127c
128c.... check if GMRES iterations are required
129c
130        iKs    = 0
131        lGMRES = 0
132c
133c.... if we are down to machine precision, don't bother solving
134c
135        if (unorm .lt. 100.*epsM**2) goto 3000
136c
137c.... set up tolerance of the Hessenberg's problem
138c
139        epsnrm = etol * unorm
140c
141c.... ------------------------>  GMRES Loop  <-------------------------
142c
143c.... loop through GMRES cycles
144c
145        do 2000 mGMRES = 1, nGMRES
146        lGMRES = mGMRES - 1
147c
148        if (lGMRES .gt. 0) then
149c
150c.... if GMRES restarts are necessary, calculate  R - A x
151c
152c
153c.... right precondition Dy
154c
155           temp = Dy
156
157ctoomuchmem           call i3Pcond (Binv,  temp,  ilwork,  'R_Pcond ')
158c
159c.... perform the A x product
160c
161           call Au1GMR (EGmass,  temp,  ilwork, iBC,iper)
162c
163c.... periodic nodes have to assemble results to their partners
164c
165           call bc3per (iBC,  temp,  iper, ilwork, nflow)
166c
167c.... left preconditioning
168c
169ctoomuchmem           call i3Pcond (Binv,  temp,  ilwork,  'L_Pcond ')
170c
171c.... subtract A x from residual and calculate the norm
172c
173           temp = res - temp
174           uBrg(:,:,1) = temp
175c
176c.... calculate the norm
177c
178           temp  = temp**2
179           call sumgat (temp, nflow, summed, ilwork)
180           unorm = sqrt(summed)
181c
182c.... flop count
183c
184           flops = flops + nflow*nshg+nshg
185c
186        endif
187c
188c.... set up RHS of the Hessenberg's problem
189c
190        call clear (eBrg, Kspace+1)
191        eBrg(1) = unorm
192c
193c.... normalize the first Krylov vector
194c
195        uBrg(:,:,1) = uBrg(:,:,1) / unorm
196c
197c.... loop through GMRES iterations
198c
199        do 1000 iK = 1, Kspace
200           iKs = iK
201
202           uBrg(:,:,iKs+1) = uBrg(:,:,iKs)
203c
204c.... right EBE precondition the LHS ( u_{i+1} <-- inverse(U) u_i )
205c
206ctoomuchmem           call i3Pcond (Binv,  uBrg(:,:,iKs+1), ilwork,  'R_Pcond ')
207c
208c.... Au product  ( u_{i+1} <-- EGmass u_{i+1} )
209c
210           call Au1GMR ( EGmass, uBrg(:,:,iKs+1),  ilwork, iBC,iper)
211c
212c.... periodic nodes have to assemble results to their partners
213c
214           call bc3per (iBC,  uBrg(:,:,iKs+1),  iper, ilwork, nflow)
215
216c
217c.... left EBE precondition the LHS ( u_{i+1} <-- inverse(L) u_{i+1} )
218c
219ctoomuchmem           call i3Pcond (Binv,  uBrg(:,:,iKs+1), ilwork, 'L_Pcond ')
220c
221c.... orthogonalize and get the norm
222c
223          do jK = 1, iKs+1
224c
225            if (jK .eq. 1) then
226c
227              temp = uBrg(:,:,iKs+1) * uBrg(:,:,1)  ! {u_{i+1}*u_1} vector
228              call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1)
229c
230            else
231c
232c project off jK-1 vector
233c
234              uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) - beta * uBrg(:,:,jK-1)
235c
236              temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector
237              call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j)
238c
239            endif
240c
241            HBrg(jK,iKs) = beta   ! put this in the Hessenberg Matrix
242c
243        enddo
244c
245        flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp
246c
247c  the last inner product was with what was left of the vector (after
248c  projecting off all of the previous vectors
249c
250        unorm           = sqrt(beta)
251        HBrg(iKs+1,iKs) = unorm   ! this fills the 1 sub diagonal band
252c
253c.... normalize the Krylov vector
254c
255        uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm  ! normalize the next Krylov
256c vector
257c
258c.... construct and reduce the Hessenberg Matrix
259c  since there is only one subdiagonal we can use a Givens rotation to
260c  rotate off each subdiagonal AS IT IS FORMED.   We do this because it
261c  allows us to check progress of solution and quit when satisfied.  Note
262c  that all future K vects will put a subdiagonal in the next column so
263c  there is no penalty to work ahead as  the rotation for the next vector
264c  will be unaffected by this rotation.
265
266c
267c     H Y = E ========>   R_i H Y = R_i E
268c
269           do jK = 1, iKs-1
270              tmp            =  Rcos(jK) * HBrg(jK,  iKs) +
271     &                          Rsin(jK) * HBrg(jK+1,iKs)
272              HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK,  iKs) +
273     &                          Rcos(jK) * HBrg(jK+1,iKs)
274              HBrg(jK,  iKs) =  tmp
275           enddo
276c
277           tmp             = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2)
278           Rcos(iKs)       = HBrg(iKs,  iKs) / tmp
279           Rsin(iKs)       = HBrg(iKs+1,iKs) / tmp
280           HBrg(iKs,  iKs) = tmp
281           HBrg(iKs+1,iKs) = zero
282c
283c.... rotate eBrg    R_i E
284c
285           tmp         = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1)
286           eBrg(iKs+1) =-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1)
287           eBrg(iKs)   = tmp
288c
289c.... check for convergence
290c
291           ntotGM = ntotGM + 1
292           echeck=abs(eBrg(iKs+1))
293           if (echeck .le. epsnrm) exit
294c
295c.... end of GMRES iteration loop
296c
297 1000   continue
298c
299c.... ------------------------->   Solution   <------------------------
300c
301c.... if converged or end of Krylov space
302c
303c.... solve for yBrg
304c
305        do jK = iKs, 1, -1
306           yBrg(jK) = eBrg(jK) / HBrg(jK,jK)
307           do lK = 1, jK-1
308              eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK)
309           enddo
310        enddo
311c
312c.... update Dy
313c
314        do jK = 1, iKs
315           Dy = Dy + yBrg(jK) * uBrg(:,:,jK)
316        enddo
317c
318c.... flop count
319c
320        flops = flops + (3*iKs+1)*nflow*nshg
321c
322c.... check for convergence
323c
324
325        echeck=abs(eBrg(iKs+1))
326        if (echeck .le. epsnrm) exit
327        if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction',
328     &  (one-echeck/unorm)/(one-etol)*100
329c
330c.... end of mGMRES loop
331c
332 2000 continue
333c
334c.... ------------------------>   Converged   <------------------------
335c
336c.... if converged
337c
338 3000 continue
339c
340c.... back EBE precondition the results
341c
342ctoomuchmem      call i3Pcond (Binv,  Dy, ilwork, 'R_Pcond ')
343c
344c.... back block-diagonal precondition the results
345c
346      call i3LU (BDiag, Dy, 'backward')
347c
348c
349c.... output the statistics
350c
351              call rstat (res, ilwork)
352c
353c.... stop the timer
354c
355 3002 continue                  ! no solve just res.
356      call timer ('Back    ')
357c
358c.... end
359c
360      return
361      end
362
363
364
365
366
367      subroutine SolGMRs(y,         ac,        yold,      acold,
368     &			 x,         iBC,       BC,
369     &                   col,       row,       lhsk,
370     &                   res,       BDiag,     HBrg,      eBrg,
371     &                   yBrg,      Rcos,      Rsin,      iper,
372     &                   ilwork,    shp,       shgl,      shpb,
373     &                   shglb,     Dy,        rerr)
374c
375c----------------------------------------------------------------------
376c
377c  This is the preconditioned GMRES driver routine.
378c
379c input:
380c  y      (nshg,ndof)           : Y-variables at n+alpha_v
381c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
382c  yold   (nshg,ndof)           : Y-variables at beginning of step
383c  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
384c  x      (numnp,nsd)            : node coordinates
385c  iBC    (nshg)                : BC codes
386c  BC     (nshg,ndofBC)         : BC constraint parameters
387c  HBrg   (Kspace+1,Kspace)      : Hessenberg matrix (LHS matrix)
388c  eBrg   (Kspace+1)             : RHS      of Hessenberg minim. problem
389c  yBrg   (Kspace)               : solution of Hessenberg minim. problem
390c  Rcos   (Kspace)               : Rotational cosine of QR algorithm
391c  Rsin   (Kspace)               : Rotational sine   of QR algorithm
392c  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
393c  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
394c
395c output:
396c  res    (nshg,nflow)           : preconditioned residual
397c  BDiag  (nshg,nflow,nflow)      : block-diagonal preconditioner
398c
399c
400c Zdenek Johan,  Winter 1991.  (Fortran 90)
401c----------------------------------------------------------------------
402c
403      use pointer_data
404
405      include "common.h"
406      include "mpif.h"
407      include "auxmpi.h"
408c
409      integer col(nshg+1), row(nnz*nshg)
410      real*8 lhsK(nflow*nflow,nnz_tot)
411
412
413      dimension y(nshg,ndof),             ac(nshg,ndof),
414     &          yold(nshg,ndof),          acold(nshg,ndof),
415     &          x(numnp,nsd),
416     &          iBC(nshg),                BC(nshg,ndofBC),
417     &          res(nshg,nflow),
418     &          BDiag(nshg,nflow,nflow),
419     &          HBrg(Kspace+1,Kspace),    eBrg(Kspace+1),
420     &          yBrg(Kspace),             Rcos(Kspace),
421     &          Rsin(Kspace),             ilwork(nlwork),
422     &          iper(nshg)
423c
424      dimension Dy(nshg,nflow),            rmes(nshg,nflow),
425     &          temp(nshg,nflow),
426     &          uBrg(nshg,nflow,Kspace+1)
427c
428      dimension shp(MAXTOP,maxsh,MAXQPT),
429     &          shgl(MAXTOP,nsd,maxsh,MAXQPT),
430     &          shpb(MAXTOP,maxsh,MAXQPT),
431     &          shglb(MAXTOP,nsd,maxsh,MAXQPT)
432      real*8    rerr(nshg,10)
433c
434c
435c.... *******************>> Element Data Formation <<******************
436c
437c
438c.... set the parameters for flux and surface tension calculations
439c
440c
441      idflx = 0
442      if(idiff >= 1)  idflx= idflx + (nflow-1) * nsd
443      if (isurf == 1) idflx=idflx + nsd
444c
445c.... form the LHS matrices, the residual vector, and the block
446c     diagonal preconditioner
447c
448      call ElmGMRs(y,             ac,            x,
449     &             shp,           shgl,          iBC,
450     &             BC,            shpb,
451     &             shglb,         res,
452     &             rmes,          BDiag,         iper,
453     &             ilwork,        lhsK,          col,
454     &             row,           rerr )
455
456	call tnanq(res,5, 'res_egmr')
457	call tnanq(BDiag,25, 'bdg_egmr')
458c
459c.... **********************>>    EBE - GMRES    <<********************
460c
461      call timer ('Solver  ')
462c
463c.... ------------------------> Initialization <-----------------------
464c
465c
466c.... LU decompose the block diagonals
467c
468      if (iprec .ne. 0) then
469         call i3LU (BDiag, res,  'LU_Fact ')
470         if (numpe > 1) then
471            call commu (BDiag  , ilwork, nflow*nflow  , 'out')
472         endif
473      endif
474c
475c.... block diagonal precondition residual
476c
477      call i3LU (BDiag, res,  'forward ')
478c
479c Check the residual for divering trend
480c
481	call rstatCheck(res,ilwork,y,ac)
482c
483c.... initialize Dy
484c
485      Dy = zero
486c
487c.... Pre-precondition the LHS mass matrix and set up the sparse
488c     preconditioners
489c
490
491      if(lhs.eq.1) call Spsi3pre (BDiag,    lhsK,  col, row)
492c
493c.... copy res in uBrg(1)
494c
495      uBrg(:,:,1) = res
496c
497c.... calculate norm of residual
498c
499      temp  = res**2
500
501      call sumgat (temp, nflow, summed, ilwork)
502      unorm = sqrt(summed)
503c
504c.... check if GMRES iterations are required
505c
506      iKs    = 0
507      lGMRESs = 0
508c
509c.... if we are down to machine precision, don't bother solving
510c
511      if (unorm .lt. 100.*epsM**2) goto 3000
512c
513c.... set up tolerance of the Hessenberg's problem
514c
515      epsnrm = etol * unorm
516c
517c.... ------------------------>  GMRES Loop  <-------------------------
518c
519c.... loop through GMRES cycles
520c
521      do 2000 mGMRES = 1, nGMRES
522         lGMRESs = mGMRES - 1
523c
524         if (lGMRES .gt. 0) then
525c
526c.... if GMRES restarts are necessary, calculate  R - A x
527c
528c
529c.... right precondition Dy
530c
531            temp = Dy
532
533c
534c.... perform the A x product
535c
536            call SparseAp (iper,ilwork,iBC, col, row, lhsK,  temp)
537c           call tnanq(temp,5, 'q_spAPrs')
538
539c
540c.... periodic nodes have to assemble results to their partners
541c
542            call bc3per (iBC,  temp,  iper, ilwork, nflow)
543c           call tnanq(temp,5, 'q_BCprs')
544c
545c.... subtract A x from residual and calculate the norm
546c
547            temp = res - temp
548            uBrg(:,:,1) = temp
549c
550c.... calculate the norm
551c
552            temp  = temp**2
553            call sumgat (temp, nflow, summed, ilwork)
554            unorm = sqrt(summed)
555c
556c.... flop count
557c
558            flops = flops + nflow*nshg+nshg
559c
560         endif
561c
562c.... set up RHS of the Hessenberg's problem
563c
564         call clear (eBrg, Kspace+1)
565         eBrg(1) = unorm
566c
567c.... normalize the first Krylov vector
568c
569         uBrg(:,:,1) = uBrg(:,:,1) / unorm
570c
571c.... loop through GMRES iterations
572c
573         do 1000 iK = 1, Kspace
574            iKs = iK
575
576            uBrg(:,:,iKs+1) = uBrg(:,:,iKs)
577c
578c.... Au product  ( u_{i+1} <-- EGmass u_{i+1} )
579c
580            call SparseAp (iper, ilwork, iBC,
581     &                     col,  row,    lhsK,
582     &                     uBrg(:,:,iKs+1) )
583c           call tnanq(uBrg(:,:,iKS+1),5, 'q_spAP')
584
585c
586c.... periodic nodes have to assemble results to their partners
587c
588            call bc3per (iBC,  uBrg(:,:,iKs+1),  iper, ilwork, nflow)
589c           call tnanq(uBrg(:,:,iKS+1),5, 'q_bc')
590
591c
592c.... orthogonalize and get the norm
593c
594            do jK = 1, iKs+1
595c
596               if (jK .eq. 1) then
597c
598                  temp = uBrg(:,:,iKs+1) * uBrg(:,:,1) ! {u_{i+1}*u_1} vector
599                  call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1)
600c
601               else
602c
603c project off jK-1 vector
604c
605                  uBrg(:,:,iKs+1)=uBrg(:,:,iKs+1)-beta * uBrg(:,:,jK-1)
606c
607                  temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector
608                  call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j)
609c
610               endif
611c
612               HBrg(jK,iKs) = beta ! put this in the Hessenberg Matrix
613c
614            enddo
615c
616            flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp
617c
618c  the last inner product was with what was left of the vector (after
619c  projecting off all of the previous vectors
620c
621        if(beta.le.0) write(*,*) 'beta in solgmr non-positive'
622            unorm           = sqrt(beta)
623            HBrg(iKs+1,iKs) = unorm ! this fills the 1 sub diagonal band
624c
625c.... normalize the Krylov vector
626c
627            uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm ! normalize the next Krylov
628c vector
629c
630c.... construct and reduce the Hessenberg Matrix
631c  since there is only one subdiagonal we can use a Givens rotation to
632c  rotate off each subdiagonal AS IT IS FORMED.   We do this because it
633c  allows us to check progress of solution and quit when satisfied.  Note
634c  that all future K vects will put a subdiagonal in the next column so
635c  there is no penalty to work ahead as  the rotation for the next vector
636c  will be unaffected by this rotation.
637
638c
639c     H Y = E ========>   R_i H Y = R_i E
640c
641            do jK = 1, iKs-1
642               tmp            =  Rcos(jK) * HBrg(jK,  iKs) +
643     &                           Rsin(jK) * HBrg(jK+1,iKs)
644               HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK,  iKs) +
645     &                           Rcos(jK) * HBrg(jK+1,iKs)
646               HBrg(jK,  iKs) =  tmp
647            enddo
648c
649            tmp            = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2)
650            Rcos(iKs)      = HBrg(iKs,  iKs) / tmp
651            Rsin(iKs)      = HBrg(iKs+1,iKs) / tmp
652            HBrg(iKs,  iKs)= tmp
653            HBrg(iKs+1,iKs)= zero
654c
655c.... rotate eBrg    R_i E
656c
657            tmp        = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1)
658            eBrg(iKs+1)=-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1)
659            eBrg(iKs)  = tmp
660c
661c.... check for convergence
662c
663            ntotGM = ntotGM + 1
664            echeck=abs(eBrg(iKs+1))
665            if (echeck .le. epsnrm.and. iKs .ge. minIters) exit
666c
667c.... end of GMRES iteration loop
668c
669 1000    continue
670c
671c.... ------------------------->   Solution   <------------------------
672c
673c.... if converged or end of Krylov space
674c
675c.... solve for yBrg
676c
677         do jK = iKs, 1, -1
678            yBrg(jK) = eBrg(jK) / HBrg(jK,jK)
679            do lK = 1, jK-1
680               eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK)
681            enddo
682         enddo
683c
684c.... update Dy
685c
686         do jK = 1, iKs
687            Dy = Dy + yBrg(jK) * uBrg(:,:,jK)
688         enddo
689c
690c.... flop count
691c
692         flops = flops + (3*iKs+1)*nflow*nshg
693c
694c.... check for convergence
695c
696        echeck=abs(eBrg(iKs+1))
697        if (echeck .le. epsnrm) exit
698!        if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction',
699!     &  (one-echeck*etol/epsnrm)/(one-etol)*100
700
701c
702c.... end of mGMRES loop
703c
704 2000 continue
705c
706c.... ------------------------>   Converged   <------------------------
707c
708c.... if converged
709c
710 3000 continue
711
712c
713c
714c.... back block-diagonal precondition the results
715c
716      call i3LU (BDiag, Dy, 'backward')
717c
718c
719c.... output the statistics
720c
721      call rstat (res, ilwork)
722
723      if(myrank.eq.master) then
724        if (echeck .le. epsnrm) then
725            write(*,*)
726        else
727            write(*,*)'solver tolerance %satisfaction',
728     &  (one-echeck*etol/epsnrm)/(one-etol)*100
729        endif
730      endif
731c
732c.... stop the timer
733c
734 3002 continue                  ! no solve just res.
735      call timer ('Back    ')
736c
737c.... end
738c
739      return
740      end
741
742        subroutine SolGMRSclr(y,       ac,      yold,
743     &                        acold,   EGmasst,
744     &                        x,       elDw,
745     &                        iBC,      BC,
746     &                        rest,     HBrg,     eBrg,
747     &                        yBrg,     Rcos,     Rsin,    iper,
748     &                        ilwork,
749     &                        shp,      shgl,
750     &                        shpb,     shglb,  Dyt)
751c
752c----------------------------------------------------------------------
753c
754c  This is the preconditioned GMRES driver routine.
755c
756c input:
757c  y      (nshg,ndof)           : Y-variables at n+alpha_f
758c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
759c  yold   (nshg,ndof)           : Y-variables at beginning of step
760c  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
761c  x      (numnp,nsd)            : node coordinates
762c  iBC    (nshg)                : BC codes
763c  BC     (nshg,ndofBC)         : BC constraint parameters
764c  iper   (nshg)                : periodic nodal information
765c
766c output:
767c  y      (nshg,ndof)           : Y-variables at n+alpha_f
768c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
769c  HBrg   (Kspace+1,Kspace)      : Hessenberg matrix (LHS matrix)
770c  eBrg   (Kspace+1)             : RHS      of Hessenberg minim. problem
771c  yBrg   (Kspace)               : solution of Hessenberg minim. problem
772c  Rcos   (Kspace)               : Rotational cosine of QR algorithm
773c  Rsin   (Kspace)               : Rotational sine   of QR algorithm
774c output:
775c  rest   (numnp)           : preconditioned residual
776c
777c
778c Zdenek Johan,  Winter 1991.  (Fortran 90)
779c----------------------------------------------------------------------
780c
781        use pointer_data
782
783        include "common.h"
784        include "mpif.h"
785        include "auxmpi.h"
786c
787        dimension y(nshg,ndof),      ac(nshg,ndof),
788     &            yold(nshg,ndof),   acold(nshg,ndof),
789     &            x(numnp,nsd),
790     &            iBC(nshg),         BC(nshg,ndofBC),
791     &            rest(nshg),
792     &            Diag(nshg),
793     &            HBrg(Kspace+1,*),  eBrg(*),
794     &            yBrg(*),           Rcos(*),
795     &            Rsin(*),           ilwork(nlwork),
796     &            iper(nshg),        EGmasst(numel,nshape,nshape)
797c
798        dimension Dyt(nshg),         rmest(nshg),
799     &            tempt(nshg),       Dinv(nshg),
800     &            uBrgt(nshg,Kspace+1)
801c
802        dimension shp(MAXTOP,maxsh,MAXQPT),
803     &            shgl(MAXTOP,nsd,maxsh,MAXQPT),
804     &            shpb(MAXTOP,maxsh,MAXQPT),
805     &            shglb(MAXTOP,nsd,maxsh,MAXQPT)
806        real*8    elDw(numel)
807c
808c.... *******************>> Element Data Formation <<******************
809c
810c
811c.... form the LHS matrices, the residual vector, and the block
812c     diagonal preconditioner
813c
814        call ElmGMRSclr(y,             ac,
815     &                  x,             elDw,      shp,        shgl,
816     &                  iBC,           BC,
817     &                  shpb,          shglb,
818     &                  rest,
819     &                  rmest,         Diag,       iper,
820     &                  ilwork,        EGmasst)
821c
822c.... **********************>>    EBE - GMRES    <<********************
823c
824        call timer ('Solver  ')
825c
826c.... ------------------------> Initialization <-----------------------
827c
828c
829      id = isclr+5
830c.... initialize Dy
831c
832        Dyt = zero
833c
834c.... Right preconditioner
835c
836        call i3preSclr(Diag, Dinv, EGmassT, ilwork)
837c
838c Check the residual for divering trend
839c
840
841        call rstatCheckSclr(rest,ilwork,y,ac)
842
843c
844c.... copy rest in uBrgt(1)
845c
846        uBrgt(:,1) = rest
847c
848c.... calculate norm of residual
849c
850        tempt  = rest**2
851
852        call sumgat (tempt, 1, summed, ilwork)
853        unorm = sqrt(summed)
854c
855c.... check if GMRES iterations are required
856c
857        iKss    = 0
858        lGMRESt = 0
859c
860c.... if we are down to machine precision, don't bother solving
861c
862        if (unorm .lt. 100.*epsM**2) goto 3000
863c
864c.... set up tolerance of the Hessenberg's problem
865c
866        epsnrm = etol * unorm
867c
868c.... ------------------------>  GMRES Loop  <-------------------------
869c
870c.... loop through GMRES cycles
871c
872        do 2000 mGMRES = 1, nGMRES
873        lGMRESt = mGMRES - 1
874c
875        if (lGMRESt .gt. 0) then
876c
877c.... if GMRES restarts are necessary, calculate  R - A x
878c
879c
880
881c.... perform the A x product
882c
883           call Au1GMRSclr (EGmasst,  tempt,  ilwork, iper)
884c
885c.... periodic nodes have to assemble results to their partners
886c
887c          call bc3perSclr (iBC,  tempt,  iper)
888c
889c.... subtract A x from residual and calculate the norm
890c
891           tempt = rest - tempt
892           uBrgt(:,1) = tempt
893c
894c.... calculate the norm
895c
896           tempt  = tempt**2
897           call sumgat (tempt, 1, summed, ilwork)
898           unorm = sqrt(summed)
899c
900c.... flop count
901c
902           flops = flops + ndof*numnp+numnp
903c
904        endif
905c
906c.... set up RHS of the Hessenberg's problem
907c
908        call clear (eBrg, Kspace+1)
909        call clear (HBrg, Kspace+1)
910        eBrg(1) = unorm
911c
912c.... normalize the first Krylov vector
913c
914        uBrgt(:,1) = uBrgt(:,1) / unorm
915c
916c.... loop through GMRES iterations
917c
918        do 1000 iK = 1, Kspace
919           iKss = iK
920
921           uBrgt(:,iKss+1) = uBrgt(:,iKss)
922
923c.... Au product  ( u_{i+1} <-- EGmass u_{i+1} )
924c
925           call Au1GMRSclr ( EGmasst, uBrgt(:,iKss+1),  ilwork, iper )
926
927c
928c.... periodic nodes have to assemble results to their partners
929c
930           call bc3perSclr (iBC,  uBrgt(:,iKss+1),  iper)
931c
932c.... orthogonalize and get the norm
933c
934          do jK = 1, iKss+1
935c
936            if (jK .eq. 1) then
937c
938              tempt = uBrgt(:,iKss+1) * uBrgt(:,1)  ! {u_{i+1}*u_1} vector
939              call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_1)
940c
941            else
942c
943c project off jK-1 vector
944c
945          uBrgt(:,iKss+1) = uBrgt(:,iKss+1) - beta * uBrgt(:,jK-1)
946c
947              tempt = uBrgt(:,iKss+1) * uBrgt(:,jK) !{u_{i+1}*u_j} vector
948              call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_j)
949c
950            endif
951c
952            HBrg(jK,iKss) = beta   ! put this in the Hessenberg Matrix
953c
954        enddo
955c
956        flops = flops + (3*iKss+1)*ndof*numnp+(iKss+1)*numnp
957c
958c  the last inner product was with what was left of the vector (after
959c  projecting off all of the previous vectors
960c
961        unorm           = sqrt(beta)
962        HBrg(iKss+1,iKss) = unorm   ! this fills the 1 sub diagonal band
963c
964c.... normalize the Krylov vector
965c
966        uBrgt(:,iKss+1) = uBrgt(:,iKss+1) / unorm  ! normalize the next Krylov
967c vector
968c
969c.... construct and reduce the Hessenberg Matrix
970c  since there is only one subdiagonal we can use a Givens rotation to
971c  rotate off each subdiagonal AS IT IS FORMED.   We do this because it
972c  allows us to check progress of solution and quit when satisfied.  Note
973c  that all future K vects will put a subdiagonal in the next column so
974c  there is no penalty to work ahead as  the rotation for the next vector
975c  will be unaffected by this rotation.
976
977c
978c     H Y = E ========>   R_i H Y = R_i E
979c
980           do jK = 1, iKss-1
981              tmp            =  Rcos(jK) * HBrg(jK,  iKss) +
982     &                          Rsin(jK) * HBrg(jK+1,iKss)
983              HBrg(jK+1,iKss) = -Rsin(jK) * HBrg(jK,  iKss) +
984     &                          Rcos(jK) * HBrg(jK+1,iKss)
985              HBrg(jK,  iKss) =  tmp
986           enddo
987c
988           tmp        = sqrt(HBrg(iKss,iKss)**2 + HBrg(iKss+1,iKss)**2)
989           Rcos(iKss) = HBrg(iKss,  iKss) / tmp
990           Rsin(iKss) = HBrg(iKss+1,iKss) / tmp
991           HBrg(iKss,  iKss) = tmp
992           HBrg(iKss+1,iKss) = zero
993c
994c.... rotate eBrg    R_i E
995c
996           tmp         = Rcos(iKss)*eBrg(iKss) + Rsin(iKss)*eBrg(iKss+1)
997           eBrg(iKss+1)=-Rsin(iKss)*eBrg(iKss) + Rcos(iKss)*eBrg(iKss+1)
998           eBrg(iKss)  = tmp
999c
1000c.... check for convergence
1001c
1002           ercheck=eBrg(iKss+1)
1003           ntotGMs = ntotGMs + 1
1004           if (abs(eBrg(iKss+1)) .le. epsnrm) exit
1005c
1006c.... end of GMRES iteration loop
1007c
1008 1000   continue
1009c
1010c.... ------------------------->   Solution   <------------------------
1011c
1012c.... if converged or end of Krylov space
1013c
1014c.... solve for yBrg
1015c
1016        do jK = iKss, 1, -1
1017           yBrg(jK) = eBrg(jK) / HBrg(jK,jK)
1018           do lK = 1, jK-1
1019              eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK)
1020           enddo
1021        enddo
1022c
1023c.... update Dy
1024c
1025        do jK = 1, iKss
1026           Dyt = Dyt + yBrg(jK) * uBrgt(:,jK)
1027        enddo
1028c
1029c.... flop count
1030c
1031        flops = flops + (3*iKss+1)*ndof*numnp
1032c
1033c.... check for convergence
1034c
1035        if (abs(eBrg(iKss+1)) .le. epsnrm) exit
1036c
1037c.... end of mGMRES loop
1038c
1039 2000 continue
1040c
1041c.... ------------------------>   Converged   <------------------------
1042c
1043c.... if converged
1044c
1045 3000 continue
1046c
1047c.... back precondition the result
1048c
1049      Dyt(:) = Dyt(:) * Dinv(:)
1050c
1051c.... output the statistics
1052c
1053      call rstatSclr(rest, ilwork)
1054c.... stop the timer
1055c
1056 3002 continue                  ! no solve just res.
1057      call timer ('Back    ')
1058c
1059c.... end
1060c
1061      return
1062      end
1063
1064
1065