xref: /phasta/phSolver/compressible/solgmr.f (revision 0be30ed5f961ad1a1373ad7d2e756d471b0c0231)
1        subroutine SolGMRe (y,         ac,        yold,      acold,
2     &			   x,         iBC,       BC,        EGmass,
3     &                     res,       BDiag,     HBrg,      eBrg,
4     &                     yBrg,      Rcos,      Rsin,      iper,
5     &                     ilwork,    shp,       shgl,      shpb,
6     &                     shglb,     Dy, rerr)
7c
8c----------------------------------------------------------------------
9c
10c  This is the preconditioned GMRES driver routine.
11c
12c input:
13c  y      (nshg,ndof)           : Y-variables at n+alpha_v
14c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
15c  yold   (nshg,ndof)           : Y-variables at beginning of step
16c  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
17c  x      (numnp,nsd)            : node coordinates
18c  iBC    (nshg)                : BC codes
19c  BC     (nshg,ndofBC)         : BC constraint parameters
20c  HBrg   (Kspace+1,Kspace)      : Hessenberg matrix (LHS matrix)
21c  eBrg   (Kspace+1)             : RHS      of Hessenberg minim. problem
22c  yBrg   (Kspace)               : solution of Hessenberg minim. problem
23c  Rcos   (Kspace)               : Rotational cosine of QR algorithm
24c  Rsin   (Kspace)               : Rotational sine   of QR algorithm
25c  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
26c  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
27c
28c output:
29c  res    (nshg,nflow)           : preconditioned residual
30c  BDiag  (nshg,nflow,nflow)      : block-diagonal preconditioner
31c
32c
33c Zdenek Johan,  Winter 1991.  (Fortran 90)
34c----------------------------------------------------------------------
35c
36        use pointer_data
37
38        include "common.h"
39        include "mpif.h"
40        include "auxmpi.h"
41c
42        dimension y(nshg,ndof),             ac(nshg,ndof),
43     &            yold(nshg,ndof),          acold(nshg,ndof),
44     &            x(numnp,nsd),
45     &            iBC(nshg),                BC(nshg,ndofBC),
46     &            res(nshg,nflow),
47     &            BDiag(nshg,nflow,nflow),
48     &            HBrg(Kspace+1,*),         eBrg(*),
49     &            yBrg(*),                  Rcos(*),
50     &            Rsin(*),                  ilwork(nlwork),
51     &            iper(nshg),               EGmass(numel,nedof,nedof)!,
52ctoomuchmem     &            Binv(numel,nedof,nedof)
53c
54        dimension Dy(nshg,nflow),            rmes(nshg,nflow),
55     &            temp(nshg,nflow),
56     &            uBrg(nshg,nflow,Kspace+1)
57c
58        dimension shp(MAXTOP,maxsh,MAXQPT),
59     &            shgl(MAXTOP,nsd,maxsh,MAXQPT),
60     &            shpb(MAXTOP,maxsh,MAXQPT),
61     &            shglb(MAXTOP,nsd,maxsh,MAXQPT)
62      real*8    rerr(nshg,10)
63c
64c.... *******************>> Element Data Formation <<******************
65c
66c
67c.... set the parameters for flux and surface tension calculations
68c
69c
70        idflx = 0
71        if(idiff >= 1)  idflx= idflx + (nflow-1) * nsd
72        if (isurf == 1) idflx=idflx + nsd
73c
74c.... form the LHS matrices, the residual vector, and the block
75c     diagonal preconditioner
76c
77        call ElmGMRe(y,             ac,            x,
78     &               shp,           shgl,          iBC,
79     &               BC,            shpb,
80     &               shglb,         res,
81     &               rmes,          BDiag,         iper,
82     &               ilwork,        EGmass,        rerr )
83c
84c.... **********************>>    EBE - GMRES    <<********************
85c
86        call timer ('Solver  ')
87c
88c.... ------------------------> Initialization <-----------------------
89c
90c
91c.... LU decompose the block diagonals
92c
93        if (iprec .ne. 0)
94     &  call i3LU (BDiag, res,  'LU_Fact ')
95
96c
97c.... block diagonal precondition residual
98c
99        call i3LU (BDiag, res,  'forward ')
100c
101c.... initialize Dy
102c
103        Dy = zero
104c
105c.... Pre-precondition the LHS mass matrix and set up the element
106c     by element preconditioners
107c
108ctoomuchmemory note that Binv is demoted from huge array to just one
109c        real*8 in i3pre because it takes too much memory
110
111        call i3pre (BDiag,    Binv,   EGmass,  ilwork)
112c
113c.... left EBE precondition the residual
114c
115ctoomuchmem        call i3Pcond (Binv,  res,  ilwork,   'L_Pcond ')
116c
117c.... copy res in uBrg(1)
118c
119        uBrg(:,:,1) = res
120c
121c.... calculate norm of residual
122c
123        temp  = res**2
124
125        call sumgat (temp, nflow, summed, ilwork)
126        unorm = sqrt(summed)
127c
128c.... check if GMRES iterations are required
129c
130        iKs    = 0
131        lGMRES = 0
132c
133c.... if we are down to machine precision, don't bother solving
134c
135        if (unorm .lt. 100.*epsM**2) goto 3000
136c
137c.... set up tolerance of the Hessenberg's problem
138c
139        epsnrm = etol * unorm
140c
141c.... ------------------------>  GMRES Loop  <-------------------------
142c
143c.... loop through GMRES cycles
144c
145        do 2000 mGMRES = 1, nGMRES
146        lGMRES = mGMRES - 1
147c
148        if (lGMRES .gt. 0) then
149c
150c.... if GMRES restarts are necessary, calculate  R - A x
151c
152c
153c.... right precondition Dy
154c
155           temp = Dy
156
157ctoomuchmem           call i3Pcond (Binv,  temp,  ilwork,  'R_Pcond ')
158c
159c.... perform the A x product
160c
161           call Au1GMR (EGmass,  temp,  ilwork, iBC,iper)
162c
163c.... periodic nodes have to assemble results to their partners
164c
165           call bc3per (iBC,  temp,  iper, ilwork, nflow)
166c
167c.... left preconditioning
168c
169ctoomuchmem           call i3Pcond (Binv,  temp,  ilwork,  'L_Pcond ')
170c
171c.... subtract A x from residual and calculate the norm
172c
173           temp = res - temp
174           uBrg(:,:,1) = temp
175c
176c.... calculate the norm
177c
178           temp  = temp**2
179           call sumgat (temp, nflow, summed, ilwork)
180           unorm = sqrt(summed)
181c
182c.... flop count
183c
184      !      flops = flops + nflow*nshg+nshg
185c
186        endif
187c
188c.... set up RHS of the Hessenberg's problem
189c
190        call clear (eBrg, Kspace+1)
191        eBrg(1) = unorm
192c
193c.... normalize the first Krylov vector
194c
195        uBrg(:,:,1) = uBrg(:,:,1) / unorm
196c
197c.... loop through GMRES iterations
198c
199        do 1000 iK = 1, Kspace
200           iKs = iK
201
202           uBrg(:,:,iKs+1) = uBrg(:,:,iKs)
203c
204c.... right EBE precondition the LHS ( u_{i+1} <-- inverse(U) u_i )
205c
206ctoomuchmem           call i3Pcond (Binv,  uBrg(:,:,iKs+1), ilwork,  'R_Pcond ')
207c
208c.... Au product  ( u_{i+1} <-- EGmass u_{i+1} )
209c
210           call Au1GMR ( EGmass, uBrg(:,:,iKs+1),  ilwork, iBC,iper)
211c
212c.... periodic nodes have to assemble results to their partners
213c
214           call bc3per (iBC,  uBrg(:,:,iKs+1),  iper, ilwork, nflow)
215
216c
217c.... left EBE precondition the LHS ( u_{i+1} <-- inverse(L) u_{i+1} )
218c
219ctoomuchmem           call i3Pcond (Binv,  uBrg(:,:,iKs+1), ilwork, 'L_Pcond ')
220c
221c.... orthogonalize and get the norm
222c
223          do jK = 1, iKs+1
224c
225            if (jK .eq. 1) then
226c
227              temp = uBrg(:,:,iKs+1) * uBrg(:,:,1)  ! {u_{i+1}*u_1} vector
228              call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1)
229c
230            else
231c
232c project off jK-1 vector
233c
234              uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) - beta * uBrg(:,:,jK-1)
235c
236              temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector
237              call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j)
238c
239            endif
240c
241            HBrg(jK,iKs) = beta   ! put this in the Hessenberg Matrix
242c
243        enddo
244c
245   !      flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp
246c
247c  the last inner product was with what was left of the vector (after
248c  projecting off all of the previous vectors
249c
250        unorm           = sqrt(beta)
251        HBrg(iKs+1,iKs) = unorm   ! this fills the 1 sub diagonal band
252c
253c.... normalize the Krylov vector
254c
255        uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm  ! normalize the next Krylov
256c vector
257c
258c.... construct and reduce the Hessenberg Matrix
259c  since there is only one subdiagonal we can use a Givens rotation to
260c  rotate off each subdiagonal AS IT IS FORMED.   We do this because it
261c  allows us to check progress of solution and quit when satisfied.  Note
262c  that all future K vects will put a subdiagonal in the next column so
263c  there is no penalty to work ahead as  the rotation for the next vector
264c  will be unaffected by this rotation.
265
266c
267c     H Y = E ========>   R_i H Y = R_i E
268c
269           do jK = 1, iKs-1
270              tmp            =  Rcos(jK) * HBrg(jK,  iKs) +
271     &                          Rsin(jK) * HBrg(jK+1,iKs)
272              HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK,  iKs) +
273     &                          Rcos(jK) * HBrg(jK+1,iKs)
274              HBrg(jK,  iKs) =  tmp
275           enddo
276c
277           tmp             = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2)
278           Rcos(iKs)       = HBrg(iKs,  iKs) / tmp
279           Rsin(iKs)       = HBrg(iKs+1,iKs) / tmp
280           HBrg(iKs,  iKs) = tmp
281           HBrg(iKs+1,iKs) = zero
282c
283c.... rotate eBrg    R_i E
284c
285           tmp         = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1)
286           eBrg(iKs+1) =-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1)
287           eBrg(iKs)   = tmp
288c
289c.... check for convergence
290c
291           ntotGM = ntotGM + 1
292           echeck=abs(eBrg(iKs+1))
293           if (echeck .le. epsnrm) exit
294c
295c.... end of GMRES iteration loop
296c
297 1000   continue
298c
299c.... ------------------------->   Solution   <------------------------
300c
301c.... if converged or end of Krylov space
302c
303c.... solve for yBrg
304c
305        do jK = iKs, 1, -1
306           yBrg(jK) = eBrg(jK) / HBrg(jK,jK)
307           do lK = 1, jK-1
308              eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK)
309           enddo
310        enddo
311c
312c.... update Dy
313c
314        do jK = 1, iKs
315           Dy = Dy + yBrg(jK) * uBrg(:,:,jK)
316        enddo
317c
318c.... flop count
319c
320   !      flops = flops + (3*iKs+1)*nflow*nshg
321c
322c.... check for convergence
323c
324
325        echeck=abs(eBrg(iKs+1))
326        if (echeck .le. epsnrm) exit
327        if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction',
328     &  (one-echeck/unorm)/(one-etol)*100
329c
330c.... end of mGMRES loop
331c
332 2000 continue
333c
334c.... ------------------------>   Converged   <------------------------
335c
336c.... if converged
337c
338 3000 continue
339c
340c.... back EBE precondition the results
341c
342ctoomuchmem      call i3Pcond (Binv,  Dy, ilwork, 'R_Pcond ')
343c
344c.... back block-diagonal precondition the results
345c
346      call i3LU (BDiag, Dy, 'backward')
347c
348c
349c.... output the statistics
350c
351              call rstat (res, ilwork)
352c
353c.... stop the timer
354c
355 3002 continue                  ! no solve just res.
356      call timer ('Back    ')
357c
358c.... end
359c
360      return
361      end
362
363
364
365
366
367      subroutine SolGMRs(y,         ac,        yold,      acold,
368     &			 x,         iBC,       BC,
369     &                   col,       row,       lhsk,
370     &                   res,       BDiag,     HBrg,      eBrg,
371     &                   yBrg,      Rcos,      Rsin,      iper,
372     &                   ilwork,    shp,       shgl,      shpb,
373     &                   shglb,     Dy,        rerr)
374c
375c----------------------------------------------------------------------
376c
377c  This is the preconditioned GMRES driver routine.
378c
379c input:
380c  y      (nshg,ndof)           : Y-variables at n+alpha_v
381c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
382c  yold   (nshg,ndof)           : Y-variables at beginning of step
383c  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
384c  x      (numnp,nsd)            : node coordinates
385c  iBC    (nshg)                : BC codes
386c  BC     (nshg,ndofBC)         : BC constraint parameters
387c  HBrg   (Kspace+1,Kspace)      : Hessenberg matrix (LHS matrix)
388c  eBrg   (Kspace+1)             : RHS      of Hessenberg minim. problem
389c  yBrg   (Kspace)               : solution of Hessenberg minim. problem
390c  Rcos   (Kspace)               : Rotational cosine of QR algorithm
391c  Rsin   (Kspace)               : Rotational sine   of QR algorithm
392c  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
393c  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
394c
395c output:
396c  res    (nshg,nflow)           : preconditioned residual
397c  BDiag  (nshg,nflow,nflow)      : block-diagonal preconditioner
398c
399c
400c Zdenek Johan,  Winter 1991.  (Fortran 90)
401c----------------------------------------------------------------------
402c
403      use pointer_data
404
405      include "common.h"
406      include "mpif.h"
407      include "auxmpi.h"
408c
409      integer col(nshg+1), row(nnz*nshg)
410      real*8 lhsK(nflow*nflow,nnz_tot)
411
412
413      dimension y(nshg,ndof),             ac(nshg,ndof),
414     &          yold(nshg,ndof),          acold(nshg,ndof),
415     &          x(numnp,nsd),
416     &          iBC(nshg),                BC(nshg,ndofBC),
417     &          res(nshg,nflow),
418     &          BDiag(nshg,nflow,nflow),
419     &          HBrg(Kspace+1,Kspace),    eBrg(Kspace+1),
420     &          yBrg(Kspace),             Rcos(Kspace),
421     &          Rsin(Kspace),             ilwork(nlwork),
422     &          iper(nshg)
423c
424      dimension Dy(nshg,nflow),            rmes(nshg,nflow),
425     &          temp(nshg,nflow),
426     &          uBrg(nshg,nflow,Kspace+1)
427c
428      dimension shp(MAXTOP,maxsh,MAXQPT),
429     &          shgl(MAXTOP,nsd,maxsh,MAXQPT),
430     &          shpb(MAXTOP,maxsh,MAXQPT),
431     &          shglb(MAXTOP,nsd,maxsh,MAXQPT)
432      real*8    rerr(nshg,10)
433c
434c
435c.... *******************>> Element Data Formation <<******************
436c
437c
438c.... set the parameters for flux and surface tension calculations
439c
440c
441      idflx = 0
442      if(idiff >= 1)  idflx= idflx + (nflow-1) * nsd
443      if (isurf == 1) idflx=idflx + nsd
444c
445c.... form the LHS matrices, the residual vector, and the block
446c     diagonal preconditioner
447c
448      call ElmGMRs(y,             ac,            x,
449     &             shp,           shgl,          iBC,
450     &             BC,            shpb,
451     &             shglb,         res,
452     &             rmes,          BDiag,         iper,
453     &             ilwork,        lhsK,          col,
454     &             row,           rerr )
455      call rstat (res, ilwork)
456      if(ntotGM.eq.0) resfrt=zero  !don't let this mess up scaled dB
457      if(myrank.eq.master) then
458            write(*,*)'residual prior to sbd-preconditioning'
459      endif
460c
461
462	call tnanq(res,5, 'res_egmr')
463	call tnanq(BDiag,25, 'bdg_egmr')
464c
465c.... **********************>>    EBE - GMRES    <<********************
466c
467      call timer ('Solver  ')
468c
469c.... ------------------------> Initialization <-----------------------
470c
471c
472c.... LU decompose the block diagonals
473c
474      if (iprec .ne. 0) then
475         call i3LU (BDiag, res,  'LU_Fact ')
476         if (numpe > 1) then
477            call commu (BDiag  , ilwork, nflow*nflow  , 'out')
478         endif
479      endif
480c
481c.... block diagonal precondition residual
482c
483      call i3LU (BDiag, res,  'forward ')
484c
485c Check the residual for divering trend
486c
487	call rstatCheck(res,ilwork,y,ac)
488c
489c.... initialize Dy
490c
491      Dy = zero
492c
493c.... Pre-precondition the LHS mass matrix and set up the sparse
494c     preconditioners
495c
496
497      if(lhs.eq.1) call Spsi3pre (BDiag,    lhsK,  col, row)
498c
499c.... copy res in uBrg(1)
500c
501      uBrg(:,:,1) = res
502c
503c.... calculate norm of residual
504c
505      temp  = res**2
506
507      call sumgat (temp, nflow, summed, ilwork)
508      unorm = sqrt(summed)
509c
510c.... check if GMRES iterations are required
511c
512      iKs    = 0
513      lGMRESs = 0
514c
515c.... if we are down to machine precision, don't bother solving
516c
517      if (unorm .lt. 100.*epsM**2) goto 3000
518c
519c.... set up tolerance of the Hessenberg's problem
520c
521      epsnrm = etol * unorm
522c
523c.... ------------------------>  GMRES Loop  <-------------------------
524c
525c.... loop through GMRES cycles
526c
527      do 2000 mGMRES = 1, nGMRES
528         lGMRESs = mGMRES - 1
529c
530         if (lGMRES .gt. 0) then
531c
532c.... if GMRES restarts are necessary, calculate  R - A x
533c
534c
535c.... right precondition Dy
536c
537            temp = Dy
538
539c
540c.... perform the A x product
541c
542            call SparseAp (iper,ilwork,iBC, col, row, lhsK,  temp)
543c           call tnanq(temp,5, 'q_spAPrs')
544
545c
546c.... periodic nodes have to assemble results to their partners
547c
548            call bc3per (iBC,  temp,  iper, ilwork, nflow)
549c           call tnanq(temp,5, 'q_BCprs')
550c
551c.... subtract A x from residual and calculate the norm
552c
553            temp = res - temp
554            uBrg(:,:,1) = temp
555c
556c.... calculate the norm
557c
558            temp  = temp**2
559            call sumgat (temp, nflow, summed, ilwork)
560            unorm = sqrt(summed)
561c
562c.... flop count
563c
564       !      flops = flops + nflow*nshg+nshg
565c
566         endif
567c
568c.... set up RHS of the Hessenberg's problem
569c
570         call clear (eBrg, Kspace+1)
571         eBrg(1) = unorm
572c
573c.... normalize the first Krylov vector
574c
575         uBrg(:,:,1) = uBrg(:,:,1) / unorm
576c
577c.... loop through GMRES iterations
578c
579         do 1000 iK = 1, Kspace
580            iKs = iK
581
582            uBrg(:,:,iKs+1) = uBrg(:,:,iKs)
583c
584c.... Au product  ( u_{i+1} <-- EGmass u_{i+1} )
585c
586            call SparseAp (iper, ilwork, iBC,
587     &                     col,  row,    lhsK,
588     &                     uBrg(:,:,iKs+1) )
589c           call tnanq(uBrg(:,:,iKS+1),5, 'q_spAP')
590
591c
592c.... periodic nodes have to assemble results to their partners
593c
594            call bc3per (iBC,  uBrg(:,:,iKs+1),  iper, ilwork, nflow)
595c           call tnanq(uBrg(:,:,iKS+1),5, 'q_bc')
596
597c
598c.... orthogonalize and get the norm
599c
600            do jK = 1, iKs+1
601c
602               if (jK .eq. 1) then
603c
604                  temp = uBrg(:,:,iKs+1) * uBrg(:,:,1) ! {u_{i+1}*u_1} vector
605                  call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1)
606c
607               else
608c
609c project off jK-1 vector
610c
611                  uBrg(:,:,iKs+1)=uBrg(:,:,iKs+1)-beta * uBrg(:,:,jK-1)
612c
613                  temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector
614                  call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j)
615c
616               endif
617c
618               HBrg(jK,iKs) = beta ! put this in the Hessenberg Matrix
619c
620            enddo
621c
622       !      flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp
623c
624c  the last inner product was with what was left of the vector (after
625c  projecting off all of the previous vectors
626c
627        if(beta.le.0) write(*,*) 'beta in solgmr non-positive'
628            unorm           = sqrt(beta)
629            HBrg(iKs+1,iKs) = unorm ! this fills the 1 sub diagonal band
630c
631c.... normalize the Krylov vector
632c
633            uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm ! normalize the next Krylov
634c vector
635c
636c.... construct and reduce the Hessenberg Matrix
637c  since there is only one subdiagonal we can use a Givens rotation to
638c  rotate off each subdiagonal AS IT IS FORMED.   We do this because it
639c  allows us to check progress of solution and quit when satisfied.  Note
640c  that all future K vects will put a subdiagonal in the next column so
641c  there is no penalty to work ahead as  the rotation for the next vector
642c  will be unaffected by this rotation.
643
644c
645c     H Y = E ========>   R_i H Y = R_i E
646c
647            do jK = 1, iKs-1
648               tmp            =  Rcos(jK) * HBrg(jK,  iKs) +
649     &                           Rsin(jK) * HBrg(jK+1,iKs)
650               HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK,  iKs) +
651     &                           Rcos(jK) * HBrg(jK+1,iKs)
652               HBrg(jK,  iKs) =  tmp
653            enddo
654c
655            tmp            = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2)
656            Rcos(iKs)      = HBrg(iKs,  iKs) / tmp
657            Rsin(iKs)      = HBrg(iKs+1,iKs) / tmp
658            HBrg(iKs,  iKs)= tmp
659            HBrg(iKs+1,iKs)= zero
660c
661c.... rotate eBrg    R_i E
662c
663            tmp        = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1)
664            eBrg(iKs+1)=-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1)
665            eBrg(iKs)  = tmp
666c
667c.... check for convergence
668c
669            ntotGM = ntotGM + 1
670            echeck=abs(eBrg(iKs+1))
671            if (echeck .le. epsnrm.and. iKs .ge. minIters) exit
672c
673c.... end of GMRES iteration loop
674c
675 1000    continue
676c
677c.... ------------------------->   Solution   <------------------------
678c
679c.... if converged or end of Krylov space
680c
681c.... solve for yBrg
682c
683         do jK = iKs, 1, -1
684            yBrg(jK) = eBrg(jK) / HBrg(jK,jK)
685            do lK = 1, jK-1
686               eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK)
687            enddo
688         enddo
689c
690c.... update Dy
691c
692         do jK = 1, iKs
693            Dy = Dy + yBrg(jK) * uBrg(:,:,jK)
694         enddo
695c
696c.... flop count
697c
698    !      flops = flops + (3*iKs+1)*nflow*nshg
699c
700c.... check for convergence
701c
702        echeck=abs(eBrg(iKs+1))
703        if (echeck .le. epsnrm) exit
704!        if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction',
705!     &  (one-echeck*etol/epsnrm)/(one-etol)*100
706
707c
708c.... end of mGMRES loop
709c
710 2000 continue
711c
712c.... ------------------------>   Converged   <------------------------
713c
714c.... if converged
715c
716 3000 continue
717
718c
719c
720c.... back block-diagonal precondition the results
721c
722      call i3LU (BDiag, Dy, 'backward')
723c
724c
725c.... output the statistics
726c
727      call rstat (res, ilwork)
728
729      if(myrank.eq.master) then
730        if (echeck .le. epsnrm) then
731            write(*,*)
732        else
733            write(*,*)'solver tolerance %satisfaction',
734     &  (one-echeck*etol/epsnrm)/(one-etol)*100
735        endif
736      endif
737c
738c.... stop the timer
739c
740 3002 continue                  ! no solve just res.
741      call timer ('Back    ')
742c
743c.... end
744c
745      return
746      end
747
748        subroutine SolGMRSclr(y,       ac,      yold,
749     &                        acold,   EGmasst,
750     &                        x,       elDw,
751     &                        iBC,      BC,
752     &                        rest,     HBrg,     eBrg,
753     &                        yBrg,     Rcos,     Rsin,    iper,
754     &                        ilwork,
755     &                        shp,      shgl,
756     &                        shpb,     shglb,  Dyt)
757c
758c----------------------------------------------------------------------
759c
760c  This is the preconditioned GMRES driver routine.
761c
762c input:
763c  y      (nshg,ndof)           : Y-variables at n+alpha_f
764c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
765c  yold   (nshg,ndof)           : Y-variables at beginning of step
766c  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
767c  x      (numnp,nsd)            : node coordinates
768c  iBC    (nshg)                : BC codes
769c  BC     (nshg,ndofBC)         : BC constraint parameters
770c  iper   (nshg)                : periodic nodal information
771c
772c output:
773c  y      (nshg,ndof)           : Y-variables at n+alpha_f
774c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
775c  HBrg   (Kspace+1,Kspace)      : Hessenberg matrix (LHS matrix)
776c  eBrg   (Kspace+1)             : RHS      of Hessenberg minim. problem
777c  yBrg   (Kspace)               : solution of Hessenberg minim. problem
778c  Rcos   (Kspace)               : Rotational cosine of QR algorithm
779c  Rsin   (Kspace)               : Rotational sine   of QR algorithm
780c output:
781c  rest   (numnp)           : preconditioned residual
782c
783c
784c Zdenek Johan,  Winter 1991.  (Fortran 90)
785c----------------------------------------------------------------------
786c
787        use pointer_data
788
789        include "common.h"
790        include "mpif.h"
791        include "auxmpi.h"
792c
793        dimension y(nshg,ndof),      ac(nshg,ndof),
794     &            yold(nshg,ndof),   acold(nshg,ndof),
795     &            x(numnp,nsd),
796     &            iBC(nshg),         BC(nshg,ndofBC),
797     &            rest(nshg),
798     &            Diag(nshg),
799     &            HBrg(Kspace+1,*),  eBrg(*),
800     &            yBrg(*),           Rcos(*),
801     &            Rsin(*),           ilwork(nlwork),
802     &            iper(nshg),        EGmasst(numel,nshape,nshape)
803c
804        dimension Dyt(nshg),         rmest(nshg),
805     &            tempt(nshg),       Dinv(nshg),
806     &            uBrgt(nshg,Kspace+1)
807c
808        dimension shp(MAXTOP,maxsh,MAXQPT),
809     &            shgl(MAXTOP,nsd,maxsh,MAXQPT),
810     &            shpb(MAXTOP,maxsh,MAXQPT),
811     &            shglb(MAXTOP,nsd,maxsh,MAXQPT)
812        real*8    elDw(numel)
813c
814c.... *******************>> Element Data Formation <<******************
815c
816c
817c.... form the LHS matrices, the residual vector, and the block
818c     diagonal preconditioner
819c
820        call ElmGMRSclr(y,             ac,
821     &                  x,             elDw,      shp,        shgl,
822     &                  iBC,           BC,
823     &                  shpb,          shglb,
824     &                  rest,
825     &                  rmest,         Diag,       iper,
826     &                  ilwork,        EGmasst)
827      call rstat (rest, ilwork)
828      if(ntotGM.eq.0) resfrts=zero  !don't let this mess up scaled dB
829      if(myrank.eq.master) then
830            write(*,*)'residual prior to sbd-preconditioning'
831      endif
832c
833c.... **********************>>    EBE - GMRES    <<********************
834c
835        call timer ('Solver  ')
836c
837c.... ------------------------> Initialization <-----------------------
838c
839c
840      id = isclr+5
841c.... initialize Dy
842c
843        Dyt = zero
844c
845c.... Right preconditioner
846c
847        call i3preSclr(Diag, Dinv, EGmassT, ilwork)
848c
849c Check the residual for divering trend
850c
851
852        call rstatCheckSclr(rest,ilwork,y,ac)
853
854c
855c.... copy rest in uBrgt(1)
856c
857        uBrgt(:,1) = rest
858c
859c.... calculate norm of residual
860c
861        tempt  = rest**2
862
863        call sumgat (tempt, 1, summed, ilwork)
864        unorm = sqrt(summed)
865c
866c.... check if GMRES iterations are required
867c
868        iKss    = 0
869        lGMRESt = 0
870c
871c.... if we are down to machine precision, don't bother solving
872c
873        if (unorm .lt. 100.*epsM**2) goto 3000
874c
875c.... set up tolerance of the Hessenberg's problem
876c
877        epsnrm = etol * unorm
878c
879c.... ------------------------>  GMRES Loop  <-------------------------
880c
881c.... loop through GMRES cycles
882c
883        do 2000 mGMRES = 1, nGMRES
884        lGMRESt = mGMRES - 1
885c
886        if (lGMRESt .gt. 0) then
887c
888c.... if GMRES restarts are necessary, calculate  R - A x
889c
890c
891
892c.... perform the A x product
893c
894           call Au1GMRSclr (EGmasst,  tempt,  ilwork, iper)
895c
896c.... periodic nodes have to assemble results to their partners
897c
898c          call bc3perSclr (iBC,  tempt,  iper)
899c
900c.... subtract A x from residual and calculate the norm
901c
902           tempt = rest - tempt
903           uBrgt(:,1) = tempt
904c
905c.... calculate the norm
906c
907           tempt  = tempt**2
908           call sumgat (tempt, 1, summed, ilwork)
909           unorm = sqrt(summed)
910c
911c.... flop count
912c
913      !      flops = flops + ndof*numnp+numnp
914c
915        endif
916c
917c.... set up RHS of the Hessenberg's problem
918c
919        call clear (eBrg, Kspace+1)
920        call clear (HBrg, Kspace+1)
921        eBrg(1) = unorm
922c
923c.... normalize the first Krylov vector
924c
925        uBrgt(:,1) = uBrgt(:,1) / unorm
926c
927c.... loop through GMRES iterations
928c
929        do 1000 iK = 1, Kspace
930           iKss = iK
931
932           uBrgt(:,iKss+1) = uBrgt(:,iKss)
933
934c.... Au product  ( u_{i+1} <-- EGmass u_{i+1} )
935c
936           call Au1GMRSclr ( EGmasst, uBrgt(:,iKss+1),  ilwork, iper )
937
938c
939c.... periodic nodes have to assemble results to their partners
940c
941           call bc3perSclr (iBC,  uBrgt(:,iKss+1),  iper)
942c
943c.... orthogonalize and get the norm
944c
945          do jK = 1, iKss+1
946c
947            if (jK .eq. 1) then
948c
949              tempt = uBrgt(:,iKss+1) * uBrgt(:,1)  ! {u_{i+1}*u_1} vector
950              call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_1)
951c
952            else
953c
954c project off jK-1 vector
955c
956          uBrgt(:,iKss+1) = uBrgt(:,iKss+1) - beta * uBrgt(:,jK-1)
957c
958              tempt = uBrgt(:,iKss+1) * uBrgt(:,jK) !{u_{i+1}*u_j} vector
959              call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_j)
960c
961            endif
962c
963            HBrg(jK,iKss) = beta   ! put this in the Hessenberg Matrix
964c
965        enddo
966c
967   !      flops = flops + (3*iKss+1)*ndof*numnp+(iKss+1)*numnp
968c
969c  the last inner product was with what was left of the vector (after
970c  projecting off all of the previous vectors
971c
972        unorm           = sqrt(beta)
973        HBrg(iKss+1,iKss) = unorm   ! this fills the 1 sub diagonal band
974c
975c.... normalize the Krylov vector
976c
977        uBrgt(:,iKss+1) = uBrgt(:,iKss+1) / unorm  ! normalize the next Krylov
978c vector
979c
980c.... construct and reduce the Hessenberg Matrix
981c  since there is only one subdiagonal we can use a Givens rotation to
982c  rotate off each subdiagonal AS IT IS FORMED.   We do this because it
983c  allows us to check progress of solution and quit when satisfied.  Note
984c  that all future K vects will put a subdiagonal in the next column so
985c  there is no penalty to work ahead as  the rotation for the next vector
986c  will be unaffected by this rotation.
987
988c
989c     H Y = E ========>   R_i H Y = R_i E
990c
991           do jK = 1, iKss-1
992              tmp            =  Rcos(jK) * HBrg(jK,  iKss) +
993     &                          Rsin(jK) * HBrg(jK+1,iKss)
994              HBrg(jK+1,iKss) = -Rsin(jK) * HBrg(jK,  iKss) +
995     &                          Rcos(jK) * HBrg(jK+1,iKss)
996              HBrg(jK,  iKss) =  tmp
997           enddo
998c
999           tmp        = sqrt(HBrg(iKss,iKss)**2 + HBrg(iKss+1,iKss)**2)
1000           Rcos(iKss) = HBrg(iKss,  iKss) / tmp
1001           Rsin(iKss) = HBrg(iKss+1,iKss) / tmp
1002           HBrg(iKss,  iKss) = tmp
1003           HBrg(iKss+1,iKss) = zero
1004c
1005c.... rotate eBrg    R_i E
1006c
1007           tmp         = Rcos(iKss)*eBrg(iKss) + Rsin(iKss)*eBrg(iKss+1)
1008           eBrg(iKss+1)=-Rsin(iKss)*eBrg(iKss) + Rcos(iKss)*eBrg(iKss+1)
1009           eBrg(iKss)  = tmp
1010c
1011c.... check for convergence
1012c
1013           ercheck=eBrg(iKss+1)
1014           ntotGMs = ntotGMs + 1
1015           if (abs(eBrg(iKss+1)) .le. epsnrm) exit
1016c
1017c.... end of GMRES iteration loop
1018c
1019 1000   continue
1020c
1021c.... ------------------------->   Solution   <------------------------
1022c
1023c.... if converged or end of Krylov space
1024c
1025c.... solve for yBrg
1026c
1027        do jK = iKss, 1, -1
1028           yBrg(jK) = eBrg(jK) / HBrg(jK,jK)
1029           do lK = 1, jK-1
1030              eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK)
1031           enddo
1032        enddo
1033c
1034c.... update Dy
1035c
1036        do jK = 1, iKss
1037           Dyt = Dyt + yBrg(jK) * uBrgt(:,jK)
1038        enddo
1039c
1040c.... flop count
1041c
1042   !      flops = flops + (3*iKss+1)*ndof*numnp
1043c
1044c.... check for convergence
1045c
1046        if (abs(eBrg(iKss+1)) .le. epsnrm) exit
1047c
1048c.... end of mGMRES loop
1049c
1050 2000 continue
1051c
1052c.... ------------------------>   Converged   <------------------------
1053c
1054c.... if converged
1055c
1056 3000 continue
1057c
1058c.... back precondition the result
1059c
1060      Dyt(:) = Dyt(:) * Dinv(:)
1061c
1062c.... output the statistics
1063c
1064      call rstatSclr(rest, ilwork)
1065c.... stop the timer
1066c
1067 3002 continue                  ! no solve just res.
1068      call timer ('Back    ')
1069c
1070c.... end
1071c
1072      return
1073      end
1074
1075
1076