xref: /phasta/phSolver/compressible/solgmr.f (revision 4d60bba28c1e1f3ca80b42756ae9dcbcd5c4bc48)
1        subroutine SolGMRe (y,         ac,        yold,      acold,
2     &			   x,         iBC,       BC,        EGmass,
3     &                     res,       BDiag,     HBrg,      eBrg,
4     &                     yBrg,      Rcos,      Rsin,      iper,
5     &                     ilwork,    shp,       shgl,      shpb,
6     &                     shglb,     Dy, rerr)
7c
8c----------------------------------------------------------------------
9c
10c  This is the preconditioned GMRES driver routine.
11c
12c input:
13c  y      (nshg,ndof)           : Y-variables at n+alpha_v
14c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
15c  yold   (nshg,ndof)           : Y-variables at beginning of step
16c  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
17c  x      (numnp,nsd)            : node coordinates
18c  iBC    (nshg)                : BC codes
19c  BC     (nshg,ndofBC)         : BC constraint parameters
20c  HBrg   (Kspace+1,Kspace)      : Hessenberg matrix (LHS matrix)
21c  eBrg   (Kspace+1)             : RHS      of Hessenberg minim. problem
22c  yBrg   (Kspace)               : solution of Hessenberg minim. problem
23c  Rcos   (Kspace)               : Rotational cosine of QR algorithm
24c  Rsin   (Kspace)               : Rotational sine   of QR algorithm
25c  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
26c  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
27c
28c output:
29c  res    (nshg,nflow)           : preconditioned residual
30c  BDiag  (nshg,nflow,nflow)      : block-diagonal preconditioner
31c
32c
33c Zdenek Johan,  Winter 1991.  (Fortran 90)
34c----------------------------------------------------------------------
35c
36        use pointer_data
37
38        include "common.h"
39        include "mpif.h"
40        include "auxmpi.h"
41c
42        dimension y(nshg,ndof),             ac(nshg,ndof),
43     &            yold(nshg,ndof),          acold(nshg,ndof),
44     &            x(numnp,nsd),
45     &            iBC(nshg),                BC(nshg,ndofBC),
46     &            res(nshg,nflow),
47     &            BDiag(nshg,nflow,nflow),
48     &            HBrg(Kspace+1,*),         eBrg(*),
49     &            yBrg(*),                  Rcos(*),
50     &            Rsin(*),                  ilwork(nlwork),
51     &            iper(nshg),               EGmass(numel,nedof,nedof)!,
52ctoomuchmem     &            Binv(numel,nedof,nedof)
53c
54        dimension Dy(nshg,nflow),            rmes(nshg,nflow),
55     &            temp(nshg,nflow),
56     &            uBrg(nshg,nflow,Kspace+1)
57c
58        dimension shp(MAXTOP,maxsh,MAXQPT),
59     &            shgl(MAXTOP,nsd,maxsh,MAXQPT),
60     &            shpb(MAXTOP,maxsh,MAXQPT),
61     &            shglb(MAXTOP,nsd,maxsh,MAXQPT)
62      real*8    rerr(nshg,10)
63c
64c.... *******************>> Element Data Formation <<******************
65c
66c
67c.... set the parameters for flux and surface tension calculations
68c
69c
70        idflx = 0
71        if(idiff >= 1)  idflx= idflx + (nflow-1) * nsd
72        if (isurf == 1) idflx=idflx + nsd
73c
74c.... form the LHS matrices, the residual vector, and the block
75c     diagonal preconditioner
76c
77        call ElmGMRe(y,             ac,            x,
78     &               shp,           shgl,          iBC,
79     &               BC,            shpb,
80     &               shglb,         res,
81     &               rmes,          BDiag,         iper,
82     &               ilwork,        EGmass,        rerr )
83c
84c.... **********************>>    EBE - GMRES    <<********************
85c
86        call timer ('Solver  ')
87c
88c.... ------------------------> Initialization <-----------------------
89c
90c
91c.... LU decompose the block diagonals
92c
93        if (iprec .ne. 0)
94     &  call i3LU (BDiag, res,  'LU_Fact ')
95
96c
97c.... block diagonal precondition residual
98c
99        call i3LU (BDiag, res,  'forward ')
100c
101c.... initialize Dy
102c
103        Dy = zero
104c
105c.... Pre-precondition the LHS mass matrix and set up the element
106c     by element preconditioners
107c
108ctoomuchmemory note that Binv is demoted from huge array to just one
109c        real*8 in i3pre because it takes too much memory
110
111        call i3pre (BDiag,    Binv,   EGmass,  ilwork)
112c
113c.... left EBE precondition the residual
114c
115ctoomuchmem        call i3Pcond (Binv,  res,  ilwork,   'L_Pcond ')
116c
117c.... copy res in uBrg(1)
118c
119        uBrg(:,:,1) = res
120c
121c.... calculate norm of residual
122c
123        temp  = res**2
124
125        call sumgat (temp, nflow, summed, ilwork)
126        unorm = sqrt(summed)
127c
128c.... check if GMRES iterations are required
129c
130        iKs    = 0
131        lGMRES = 0
132c
133c.... if we are down to machine precision, don't bother solving
134c
135        if (unorm .lt. 100.*epsM**2) goto 3000
136c
137c.... set up tolerance of the Hessenberg's problem
138c
139        epsnrm = etol * unorm
140c
141c.... ------------------------>  GMRES Loop  <-------------------------
142c
143c.... loop through GMRES cycles
144c
145        do 2000 mGMRES = 1, nGMRES
146        lGMRES = mGMRES - 1
147c
148        if (lGMRES .gt. 0) then
149c
150c.... if GMRES restarts are necessary, calculate  R - A x
151c
152c
153c.... right precondition Dy
154c
155           temp = Dy
156
157ctoomuchmem           call i3Pcond (Binv,  temp,  ilwork,  'R_Pcond ')
158c
159c.... perform the A x product
160c
161           call Au1GMR (EGmass,  temp,  ilwork, iBC,iper)
162c
163c.... periodic nodes have to assemble results to their partners
164c
165           call bc3per (iBC,  temp,  iper, ilwork, nflow)
166c
167c.... left preconditioning
168c
169ctoomuchmem           call i3Pcond (Binv,  temp,  ilwork,  'L_Pcond ')
170c
171c.... subtract A x from residual and calculate the norm
172c
173           temp = res - temp
174           uBrg(:,:,1) = temp
175c
176c.... calculate the norm
177c
178           temp  = temp**2
179           call sumgat (temp, nflow, summed, ilwork)
180           unorm = sqrt(summed)
181c
182c.... flop count
183c
184      !      flops = flops + nflow*nshg+nshg
185c
186        endif
187c
188c.... set up RHS of the Hessenberg's problem
189c
190        call clear (eBrg, Kspace+1)
191        eBrg(1) = unorm
192c
193c.... normalize the first Krylov vector
194c
195        uBrg(:,:,1) = uBrg(:,:,1) / unorm
196c
197c.... loop through GMRES iterations
198c
199        do 1000 iK = 1, Kspace
200           iKs = iK
201
202           uBrg(:,:,iKs+1) = uBrg(:,:,iKs)
203c
204c.... right EBE precondition the LHS ( u_{i+1} <-- inverse(U) u_i )
205c
206ctoomuchmem           call i3Pcond (Binv,  uBrg(:,:,iKs+1), ilwork,  'R_Pcond ')
207c
208c.... Au product  ( u_{i+1} <-- EGmass u_{i+1} )
209c
210           call Au1GMR ( EGmass, uBrg(:,:,iKs+1),  ilwork, iBC,iper)
211c
212c.... periodic nodes have to assemble results to their partners
213c
214           call bc3per (iBC,  uBrg(:,:,iKs+1),  iper, ilwork, nflow)
215
216c
217c.... left EBE precondition the LHS ( u_{i+1} <-- inverse(L) u_{i+1} )
218c
219ctoomuchmem           call i3Pcond (Binv,  uBrg(:,:,iKs+1), ilwork, 'L_Pcond ')
220c
221c.... orthogonalize and get the norm
222c
223          do jK = 1, iKs+1
224c
225            if (jK .eq. 1) then
226c
227              temp = uBrg(:,:,iKs+1) * uBrg(:,:,1)  ! {u_{i+1}*u_1} vector
228              call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1)
229c
230            else
231c
232c project off jK-1 vector
233c
234              uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) - beta * uBrg(:,:,jK-1)
235c
236              temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector
237              call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j)
238c
239            endif
240c
241            HBrg(jK,iKs) = beta   ! put this in the Hessenberg Matrix
242c
243        enddo
244c
245   !      flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp
246c
247c  the last inner product was with what was left of the vector (after
248c  projecting off all of the previous vectors
249c
250        unorm           = sqrt(beta)
251        HBrg(iKs+1,iKs) = unorm   ! this fills the 1 sub diagonal band
252c
253c.... normalize the Krylov vector
254c
255        uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm  ! normalize the next Krylov
256c vector
257c
258c.... construct and reduce the Hessenberg Matrix
259c  since there is only one subdiagonal we can use a Givens rotation to
260c  rotate off each subdiagonal AS IT IS FORMED.   We do this because it
261c  allows us to check progress of solution and quit when satisfied.  Note
262c  that all future K vects will put a subdiagonal in the next column so
263c  there is no penalty to work ahead as  the rotation for the next vector
264c  will be unaffected by this rotation.
265
266c
267c     H Y = E ========>   R_i H Y = R_i E
268c
269           do jK = 1, iKs-1
270              tmp            =  Rcos(jK) * HBrg(jK,  iKs) +
271     &                          Rsin(jK) * HBrg(jK+1,iKs)
272              HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK,  iKs) +
273     &                          Rcos(jK) * HBrg(jK+1,iKs)
274              HBrg(jK,  iKs) =  tmp
275           enddo
276c
277           tmp             = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2)
278           Rcos(iKs)       = HBrg(iKs,  iKs) / tmp
279           Rsin(iKs)       = HBrg(iKs+1,iKs) / tmp
280           HBrg(iKs,  iKs) = tmp
281           HBrg(iKs+1,iKs) = zero
282c
283c.... rotate eBrg    R_i E
284c
285           tmp         = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1)
286           eBrg(iKs+1) =-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1)
287           eBrg(iKs)   = tmp
288c
289c.... check for convergence
290c
291           ntotGM = ntotGM + 1
292           echeck=abs(eBrg(iKs+1))
293           if (echeck .le. epsnrm) exit
294c
295c.... end of GMRES iteration loop
296c
297 1000   continue
298c
299c.... ------------------------->   Solution   <------------------------
300c
301c.... if converged or end of Krylov space
302c
303c.... solve for yBrg
304c
305        do jK = iKs, 1, -1
306           yBrg(jK) = eBrg(jK) / HBrg(jK,jK)
307           do lK = 1, jK-1
308              eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK)
309           enddo
310        enddo
311c
312c.... update Dy
313c
314        do jK = 1, iKs
315           Dy = Dy + yBrg(jK) * uBrg(:,:,jK)
316        enddo
317c
318c.... flop count
319c
320   !      flops = flops + (3*iKs+1)*nflow*nshg
321c
322c.... check for convergence
323c
324
325        echeck=abs(eBrg(iKs+1))
326        if (echeck .le. epsnrm) exit
327        if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction',
328     &  (one-echeck/unorm)/(one-etol)*100
329c
330c.... end of mGMRES loop
331c
332 2000 continue
333c
334c.... ------------------------>   Converged   <------------------------
335c
336c.... if converged
337c
338 3000 continue
339c
340c.... back EBE precondition the results
341c
342ctoomuchmem      call i3Pcond (Binv,  Dy, ilwork, 'R_Pcond ')
343c
344c.... back block-diagonal precondition the results
345c
346      call i3LU (BDiag, Dy, 'backward')
347c
348c
349c.... output the statistics
350c
351              call rstat (res, ilwork)
352c
353c.... stop the timer
354c
355 3002 continue                  ! no solve just res.
356      call timer ('Back    ')
357c
358c.... end
359c
360      return
361      end
362
363
364
365
366
367      subroutine SolGMRs(y,         ac,        yold,      acold,
368     &			 x,         iBC,       BC,
369     &                   col,       row,       lhsk,
370     &                   res,       BDiag,     HBrg,      eBrg,
371     &                   yBrg,      Rcos,      Rsin,      iper,
372     &                   ilwork,    shp,       shgl,      shpb,
373     &                   shglb,     Dy,        rerr)
374c
375c----------------------------------------------------------------------
376c
377c  This is the preconditioned GMRES driver routine.
378c
379c input:
380c  y      (nshg,ndof)           : Y-variables at n+alpha_v
381c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
382c  yold   (nshg,ndof)           : Y-variables at beginning of step
383c  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
384c  x      (numnp,nsd)            : node coordinates
385c  iBC    (nshg)                : BC codes
386c  BC     (nshg,ndofBC)         : BC constraint parameters
387c  HBrg   (Kspace+1,Kspace)      : Hessenberg matrix (LHS matrix)
388c  eBrg   (Kspace+1)             : RHS      of Hessenberg minim. problem
389c  yBrg   (Kspace)               : solution of Hessenberg minim. problem
390c  Rcos   (Kspace)               : Rotational cosine of QR algorithm
391c  Rsin   (Kspace)               : Rotational sine   of QR algorithm
392c  shp(b) (nen,maxsh,melCat)     : element shape functions (boundary)
393c  shgl(b)(nsd,nen,maxsh,melCat) : local gradients of shape functions
394c
395c output:
396c  res    (nshg,nflow)           : preconditioned residual
397c  BDiag  (nshg,nflow,nflow)      : block-diagonal preconditioner
398c
399c
400c Zdenek Johan,  Winter 1991.  (Fortran 90)
401c----------------------------------------------------------------------
402c
403      use pointer_data
404
405      include "common.h"
406      include "mpif.h"
407      include "auxmpi.h"
408c
409      integer col(nshg+1), row(nnz*nshg)
410      real*8 lhsK(nflow*nflow,nnz_tot)
411
412
413      dimension y(nshg,ndof),             ac(nshg,ndof),
414     &          yold(nshg,ndof),          acold(nshg,ndof),
415     &          x(numnp,nsd),
416     &          iBC(nshg),                BC(nshg,ndofBC),
417     &          res(nshg,nflow),
418     &          BDiag(nshg,nflow,nflow),
419     &          HBrg(Kspace+1,Kspace),    eBrg(Kspace+1),
420     &          yBrg(Kspace),             Rcos(Kspace),
421     &          Rsin(Kspace),             ilwork(nlwork),
422     &          iper(nshg)
423c
424      dimension Dy(nshg,nflow),            rmes(nshg,nflow),
425     &          temp(nshg,nflow),
426     &          uBrg(nshg,nflow,Kspace+1)
427c
428      dimension shp(MAXTOP,maxsh,MAXQPT),
429     &          shgl(MAXTOP,nsd,maxsh,MAXQPT),
430     &          shpb(MAXTOP,maxsh,MAXQPT),
431     &          shglb(MAXTOP,nsd,maxsh,MAXQPT)
432      real*8    rerr(nshg,10)
433c
434c
435c.... *******************>> Element Data Formation <<******************
436c
437c
438c.... set the parameters for flux and surface tension calculations
439c
440c
441      idflx = 0
442      if(idiff >= 1)  idflx= idflx + (nflow-1) * nsd
443      if (isurf == 1) idflx=idflx + nsd
444c
445c.... form the LHS matrices, the residual vector, and the block
446c     diagonal preconditioner
447c
448      call ElmGMRs(y,             ac,            x,
449     &             shp,           shgl,          iBC,
450     &             BC,            shpb,
451     &             shglb,         res,
452     &             rmes,          BDiag,         iper,
453     &             ilwork,        lhsK,          col,
454     &             row,           rerr )
455      call rstat (res, ilwork)
456      if(ntotGM.eq.0) resfrt=zero  !don't let this mess up scaled dB
457      if(myrank.eq.master) then
458            write(*,*)'residual prior to sbd-preconditioning'
459      endif
460c
461
462	call tnanq(res,5, 'res_egmr')
463	call tnanq(BDiag,25, 'bdg_egmr')
464c
465c.... **********************>>    EBE - GMRES    <<********************
466c
467      call timer ('Solver  ')
468c
469c.... ------------------------> Initialization <-----------------------
470c
471c
472c.... LU decompose the block diagonals
473c
474      if (iprec .ne. 0) then
475         call i3LU (BDiag, res,  'LU_Fact ')
476         if (numpe > 1) then
477            call commu (BDiag  , ilwork, nflow*nflow  , 'out')
478         endif
479      endif
480c
481c.... block diagonal precondition residual
482c
483      call i3LU (BDiag, res,  'forward ')
484c
485c Check the residual for divering trend
486c
487	call rstatCheck(res,ilwork,y,ac)
488c
489c.... initialize Dy
490c
491      Dy = zero
492c
493c.... Pre-precondition the LHS mass matrix and set up the sparse
494c     preconditioners
495c
496
497      if(lhs.eq.1) call Spsi3pre (BDiag,    lhsK,  col, row)
498c
499c.... copy res in uBrg(1)
500c
501      uBrg(:,:,1) = res
502c
503c.... calculate norm of residual
504c
505      temp  = res**2
506
507      call sumgat (temp, nflow, summed, ilwork)
508      unorm = sqrt(summed)
509c
510c.... check if GMRES iterations are required
511c
512      iKs    = 0
513      lGMRESs = 0
514c
515c.... if we are down to machine precision, don't bother solving
516c
517      if (unorm .lt. 100.*epsM**2) goto 3000
518c
519c.... set up tolerance of the Hessenberg's problem
520c
521      epsnrm = etol * unorm
522c
523c.... ------------------------>  GMRES Loop  <-------------------------
524c
525c.... loop through GMRES cycles
526c
527      do 2000 mGMRES = 1, nGMRES
528         lGMRESs = mGMRES - 1
529c
530         if (lGMRES .gt. 0) then
531c
532c.... if GMRES restarts are necessary, calculate  R - A x
533c
534c
535c.... right precondition Dy
536c
537            temp = Dy
538
539c
540c.... perform the A x product
541c
542            call SparseAp (iper,ilwork,iBC, col, row, lhsK,  temp)
543c           call tnanq(temp,5, 'q_spAPrs')
544
545c
546c.... periodic nodes have to assemble results to their partners
547c
548            call bc3per (iBC,  temp,  iper, ilwork, nflow)
549c           call tnanq(temp,5, 'q_BCprs')
550c
551c.... subtract A x from residual and calculate the norm
552c
553            temp = res - temp
554            uBrg(:,:,1) = temp
555c
556c.... calculate the norm
557c
558            temp  = temp**2
559            call sumgat (temp, nflow, summed, ilwork)
560            unorm = sqrt(summed)
561c
562c.... flop count
563c
564       !      flops = flops + nflow*nshg+nshg
565c
566         endif
567c
568c.... set up RHS of the Hessenberg's problem
569c
570         call clear (eBrg, Kspace+1)
571         eBrg(1) = unorm
572c
573c.... normalize the first Krylov vector
574c
575         uBrg(:,:,1) = uBrg(:,:,1) / unorm
576c
577c.... loop through GMRES iterations
578c
579         do 1000 iK = 1, Kspace
580            iKs = iK
581
582            uBrg(:,:,iKs+1) = uBrg(:,:,iKs)
583c
584c.... Au product  ( u_{i+1} <-- EGmass u_{i+1} )
585c
586            call SparseAp (iper, ilwork, iBC,
587     &                     col,  row,    lhsK,
588     &                     uBrg(:,:,iKs+1) )
589c           call tnanq(uBrg(:,:,iKS+1),5, 'q_spAP')
590
591c
592c.... periodic nodes have to assemble results to their partners
593c
594            call bc3per (iBC,  uBrg(:,:,iKs+1),  iper, ilwork, nflow)
595c           call tnanq(uBrg(:,:,iKS+1),5, 'q_bc')
596
597c
598c.... orthogonalize and get the norm
599c
600            do jK = 1, iKs+1
601c
602               if (jK .eq. 1) then
603c
604                  temp = uBrg(:,:,iKs+1) * uBrg(:,:,1) ! {u_{i+1}*u_1} vector
605                  call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_1)
606c
607               else
608c
609c project off jK-1 vector
610c
611                  uBrg(:,:,iKs+1)=uBrg(:,:,iKs+1)-beta * uBrg(:,:,jK-1)
612c
613                  temp = uBrg(:,:,iKs+1) * uBrg(:,:,jK) !{u_{i+1}*u_j} vector
614                  call sumgat (temp, nflow, beta, ilwork) ! sum vector=(u_{i+1},u_j)
615c
616               endif
617c
618               HBrg(jK,iKs) = beta ! put this in the Hessenberg Matrix
619c
620            enddo
621c
622       !      flops = flops + (3*iKs+1)*nflow*numnp+(iKs+1)*numnp
623c
624c  the last inner product was with what was left of the vector (after
625c  projecting off all of the previous vectors
626c
627        if(beta.le.0) write(*,*) 'beta in solgmr non-positive'
628            unorm           = sqrt(beta)
629            HBrg(iKs+1,iKs) = unorm ! this fills the 1 sub diagonal band
630c
631c.... normalize the Krylov vector
632c
633            uBrg(:,:,iKs+1) = uBrg(:,:,iKs+1) / unorm ! normalize the next Krylov
634c vector
635c
636c.... construct and reduce the Hessenberg Matrix
637c  since there is only one subdiagonal we can use a Givens rotation to
638c  rotate off each subdiagonal AS IT IS FORMED.   We do this because it
639c  allows us to check progress of solution and quit when satisfied.  Note
640c  that all future K vects will put a subdiagonal in the next column so
641c  there is no penalty to work ahead as  the rotation for the next vector
642c  will be unaffected by this rotation.
643
644c
645c     H Y = E ========>   R_i H Y = R_i E
646c
647            do jK = 1, iKs-1
648               tmp            =  Rcos(jK) * HBrg(jK,  iKs) +
649     &                           Rsin(jK) * HBrg(jK+1,iKs)
650               HBrg(jK+1,iKs) = -Rsin(jK) * HBrg(jK,  iKs) +
651     &                           Rcos(jK) * HBrg(jK+1,iKs)
652               HBrg(jK,  iKs) =  tmp
653            enddo
654c
655            tmp            = sqrt(HBrg(iKs,iKs)**2 + HBrg(iKs+1,iKs)**2)
656            Rcos(iKs)      = HBrg(iKs,  iKs) / tmp
657            Rsin(iKs)      = HBrg(iKs+1,iKs) / tmp
658            HBrg(iKs,  iKs)= tmp
659            HBrg(iKs+1,iKs)= zero
660c
661c.... rotate eBrg    R_i E
662c
663            tmp        = Rcos(iKs) * eBrg(iKs) + Rsin(iKs) * eBrg(iKs+1)
664            eBrg(iKs+1)=-Rsin(iKs) * eBrg(iKs) + Rcos(iKs) * eBrg(iKs+1)
665            eBrg(iKs)  = tmp
666c
667c.... check for convergence
668c
669            ntotGM = ntotGM + 1
670            echeck=abs(eBrg(iKs+1))
671            if (echeck .le. epsnrm.and. iKs .ge. minIters) exit
672c
673c.... end of GMRES iteration loop
674c
675 1000    continue
676c
677c.... ------------------------->   Solution   <------------------------
678c
679c.... if converged or end of Krylov space
680c
681c.... solve for yBrg
682c
683         do jK = iKs, 1, -1
684            yBrg(jK) = eBrg(jK) / HBrg(jK,jK)
685            do lK = 1, jK-1
686               eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK)
687            enddo
688         enddo
689c
690c.... update Dy
691c
692         do jK = 1, iKs
693            Dy = Dy + yBrg(jK) * uBrg(:,:,jK)
694         enddo
695c
696c.... flop count
697c
698    !      flops = flops + (3*iKs+1)*nflow*nshg
699c
700c.... check for convergence
701c
702        echeck=abs(eBrg(iKs+1))
703        if (echeck .le. epsnrm) exit
704!        if(myrank.eq.master) write(*,*)'solver tolerance %satisfaction',
705!     &  (one-echeck*etol/epsnrm)/(one-etol)*100
706
707c
708c.... end of mGMRES loop
709c
710 2000 continue
711c
712c.... ------------------------>   Converged   <------------------------
713c
714c.... if converged
715c
716 3000 continue
717
718c
719c
720c.... back block-diagonal precondition the results
721c
722      call i3LU (BDiag, Dy, 'backward')
723c
724c
725c.... output the statistics
726c
727      call rstat (res, ilwork)
728
729      if(myrank.eq.master) then
730        if (echeck .le. epsnrm) then
731            write(*,*)
732        else
733            write(*,*)'solver tolerance %satisfaction',
734     &  (one-echeck*etol/epsnrm)/(one-etol)*100
735        endif
736      endif
737c
738c.... stop the timer
739c
740 3002 continue                  ! no solve just res.
741      call timer ('Back    ')
742c
743c.... end
744c
745      return
746      end
747
748        subroutine SolGMRSclr(y,       ac,      yold,
749     &                        acold,   EGmasst,
750     &                        x,       elDw,
751     &                        iBC,      BC,
752     &                        rest,     HBrg,     eBrg,
753     &                        yBrg,     Rcos,     Rsin,    iper,
754     &                        ilwork,
755     &                        shp,      shgl,
756     &                        shpb,     shglb,  Dyt)
757c
758c----------------------------------------------------------------------
759c
760c  This is the preconditioned GMRES driver routine.
761c
762c input:
763c  y      (nshg,ndof)           : Y-variables at n+alpha_f
764c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
765c  yold   (nshg,ndof)           : Y-variables at beginning of step
766c  acold  (nshg,ndof)           : Primvar. accel. variable at begng step
767c  x      (numnp,nsd)            : node coordinates
768c  iBC    (nshg)                : BC codes
769c  BC     (nshg,ndofBC)         : BC constraint parameters
770c  iper   (nshg)                : periodic nodal information
771c
772c output:
773c  y      (nshg,ndof)           : Y-variables at n+alpha_f
774c  ac     (nshg,ndof)           : Primvar. accel. variable n+alpha_m
775c  HBrg   (Kspace+1,Kspace)      : Hessenberg matrix (LHS matrix)
776c  eBrg   (Kspace+1)             : RHS      of Hessenberg minim. problem
777c  yBrg   (Kspace)               : solution of Hessenberg minim. problem
778c  Rcos   (Kspace)               : Rotational cosine of QR algorithm
779c  Rsin   (Kspace)               : Rotational sine   of QR algorithm
780c output:
781c  rest   (numnp)           : preconditioned residual
782c
783c
784c Zdenek Johan,  Winter 1991.  (Fortran 90)
785c----------------------------------------------------------------------
786c
787        use pointer_data
788
789        include "common.h"
790        include "mpif.h"
791        include "auxmpi.h"
792c
793        dimension y(nshg,ndof),      ac(nshg,ndof),
794     &            yold(nshg,ndof),   acold(nshg,ndof),
795     &            x(numnp,nsd),
796     &            iBC(nshg),         BC(nshg,ndofBC),
797     &            rest(nshg),
798     &            Diag(nshg),
799     &            HBrg(Kspace+1,*),  eBrg(*),
800     &            yBrg(*),           Rcos(*),
801     &            Rsin(*),           ilwork(nlwork),
802     &            iper(nshg),        EGmasst(numel,nshape,nshape)
803c
804        dimension Dyt(nshg),         rmest(nshg),
805     &            tempt(nshg),       Dinv(nshg),
806     &            uBrgt(nshg,Kspace+1)
807c
808        dimension shp(MAXTOP,maxsh,MAXQPT),
809     &            shgl(MAXTOP,nsd,maxsh,MAXQPT),
810     &            shpb(MAXTOP,maxsh,MAXQPT),
811     &            shglb(MAXTOP,nsd,maxsh,MAXQPT)
812        real*8    elDw(numel)
813c
814c.... *******************>> Element Data Formation <<******************
815c
816c
817c.... form the LHS matrices, the residual vector, and the block
818c     diagonal preconditioner
819c
820        call ElmGMRSclr(y,             ac,
821     &                  x,             elDw,      shp,        shgl,
822     &                  iBC,           BC,
823     &                  shpb,          shglb,
824     &                  rest,
825     &                  rmest,         Diag,       iper,
826     &                  ilwork,        EGmasst)
827c
828c.... **********************>>    EBE - GMRES    <<********************
829c
830        call timer ('Solver  ')
831c
832c.... ------------------------> Initialization <-----------------------
833c
834c
835      id = isclr+5
836c.... initialize Dy
837c
838        Dyt = zero
839c
840c.... Right preconditioner
841c
842        call i3preSclr(Diag, Dinv, EGmassT, ilwork)
843c
844c Check the residual for divering trend
845c
846
847        call rstatCheckSclr(rest,ilwork,y,ac)
848
849c
850c.... copy rest in uBrgt(1)
851c
852        uBrgt(:,1) = rest
853c
854c.... calculate norm of residual
855c
856        tempt  = rest**2
857
858        call sumgat (tempt, 1, summed, ilwork)
859        unorm = sqrt(summed)
860c
861c.... check if GMRES iterations are required
862c
863        iKss    = 0
864        lGMRESt = 0
865c
866c.... if we are down to machine precision, don't bother solving
867c
868        if (unorm .lt. 100.*epsM**2) goto 3000
869c
870c.... set up tolerance of the Hessenberg's problem
871c
872        epsnrm = etol * unorm
873c
874c.... ------------------------>  GMRES Loop  <-------------------------
875c
876c.... loop through GMRES cycles
877c
878        do 2000 mGMRES = 1, nGMRES
879        lGMRESt = mGMRES - 1
880c
881        if (lGMRESt .gt. 0) then
882c
883c.... if GMRES restarts are necessary, calculate  R - A x
884c
885c
886
887c.... perform the A x product
888c
889           call Au1GMRSclr (EGmasst,  tempt,  ilwork, iper)
890c
891c.... periodic nodes have to assemble results to their partners
892c
893c          call bc3perSclr (iBC,  tempt,  iper)
894c
895c.... subtract A x from residual and calculate the norm
896c
897           tempt = rest - tempt
898           uBrgt(:,1) = tempt
899c
900c.... calculate the norm
901c
902           tempt  = tempt**2
903           call sumgat (tempt, 1, summed, ilwork)
904           unorm = sqrt(summed)
905c
906c.... flop count
907c
908      !      flops = flops + ndof*numnp+numnp
909c
910        endif
911c
912c.... set up RHS of the Hessenberg's problem
913c
914        call clear (eBrg, Kspace+1)
915        call clear (HBrg, Kspace+1)
916        eBrg(1) = unorm
917c
918c.... normalize the first Krylov vector
919c
920        uBrgt(:,1) = uBrgt(:,1) / unorm
921c
922c.... loop through GMRES iterations
923c
924        do 1000 iK = 1, Kspace
925           iKss = iK
926
927           uBrgt(:,iKss+1) = uBrgt(:,iKss)
928
929c.... Au product  ( u_{i+1} <-- EGmass u_{i+1} )
930c
931           call Au1GMRSclr ( EGmasst, uBrgt(:,iKss+1),  ilwork, iper )
932
933c
934c.... periodic nodes have to assemble results to their partners
935c
936           call bc3perSclr (iBC,  uBrgt(:,iKss+1),  iper)
937c
938c.... orthogonalize and get the norm
939c
940          do jK = 1, iKss+1
941c
942            if (jK .eq. 1) then
943c
944              tempt = uBrgt(:,iKss+1) * uBrgt(:,1)  ! {u_{i+1}*u_1} vector
945              call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_1)
946c
947            else
948c
949c project off jK-1 vector
950c
951          uBrgt(:,iKss+1) = uBrgt(:,iKss+1) - beta * uBrgt(:,jK-1)
952c
953              tempt = uBrgt(:,iKss+1) * uBrgt(:,jK) !{u_{i+1}*u_j} vector
954              call sumgat (tempt, 1, beta, ilwork) ! sum vector=(u_{i+1},u_j)
955c
956            endif
957c
958            HBrg(jK,iKss) = beta   ! put this in the Hessenberg Matrix
959c
960        enddo
961c
962   !      flops = flops + (3*iKss+1)*ndof*numnp+(iKss+1)*numnp
963c
964c  the last inner product was with what was left of the vector (after
965c  projecting off all of the previous vectors
966c
967        unorm           = sqrt(beta)
968        HBrg(iKss+1,iKss) = unorm   ! this fills the 1 sub diagonal band
969c
970c.... normalize the Krylov vector
971c
972        uBrgt(:,iKss+1) = uBrgt(:,iKss+1) / unorm  ! normalize the next Krylov
973c vector
974c
975c.... construct and reduce the Hessenberg Matrix
976c  since there is only one subdiagonal we can use a Givens rotation to
977c  rotate off each subdiagonal AS IT IS FORMED.   We do this because it
978c  allows us to check progress of solution and quit when satisfied.  Note
979c  that all future K vects will put a subdiagonal in the next column so
980c  there is no penalty to work ahead as  the rotation for the next vector
981c  will be unaffected by this rotation.
982
983c
984c     H Y = E ========>   R_i H Y = R_i E
985c
986           do jK = 1, iKss-1
987              tmp            =  Rcos(jK) * HBrg(jK,  iKss) +
988     &                          Rsin(jK) * HBrg(jK+1,iKss)
989              HBrg(jK+1,iKss) = -Rsin(jK) * HBrg(jK,  iKss) +
990     &                          Rcos(jK) * HBrg(jK+1,iKss)
991              HBrg(jK,  iKss) =  tmp
992           enddo
993c
994           tmp        = sqrt(HBrg(iKss,iKss)**2 + HBrg(iKss+1,iKss)**2)
995           Rcos(iKss) = HBrg(iKss,  iKss) / tmp
996           Rsin(iKss) = HBrg(iKss+1,iKss) / tmp
997           HBrg(iKss,  iKss) = tmp
998           HBrg(iKss+1,iKss) = zero
999c
1000c.... rotate eBrg    R_i E
1001c
1002           tmp         = Rcos(iKss)*eBrg(iKss) + Rsin(iKss)*eBrg(iKss+1)
1003           eBrg(iKss+1)=-Rsin(iKss)*eBrg(iKss) + Rcos(iKss)*eBrg(iKss+1)
1004           eBrg(iKss)  = tmp
1005c
1006c.... check for convergence
1007c
1008           ercheck=eBrg(iKss+1)
1009           ntotGMs = ntotGMs + 1
1010           if (abs(eBrg(iKss+1)) .le. epsnrm) exit
1011c
1012c.... end of GMRES iteration loop
1013c
1014 1000   continue
1015c
1016c.... ------------------------->   Solution   <------------------------
1017c
1018c.... if converged or end of Krylov space
1019c
1020c.... solve for yBrg
1021c
1022        do jK = iKss, 1, -1
1023           yBrg(jK) = eBrg(jK) / HBrg(jK,jK)
1024           do lK = 1, jK-1
1025              eBrg(lK) = eBrg(lK) - yBrg(jK) * HBrg(lK,jK)
1026           enddo
1027        enddo
1028c
1029c.... update Dy
1030c
1031        do jK = 1, iKss
1032           Dyt = Dyt + yBrg(jK) * uBrgt(:,jK)
1033        enddo
1034c
1035c.... flop count
1036c
1037   !      flops = flops + (3*iKss+1)*ndof*numnp
1038c
1039c.... check for convergence
1040c
1041        if (abs(eBrg(iKss+1)) .le. epsnrm) exit
1042c
1043c.... end of mGMRES loop
1044c
1045 2000 continue
1046c
1047c.... ------------------------>   Converged   <------------------------
1048c
1049c.... if converged
1050c
1051 3000 continue
1052c
1053c.... back precondition the result
1054c
1055      Dyt(:) = Dyt(:) * Dinv(:)
1056c
1057c.... output the statistics
1058c
1059      call rstatSclr(rest, ilwork)
1060c.... stop the timer
1061c
1062 3002 continue                  ! no solve just res.
1063      call timer ('Back    ')
1064c
1065c.... end
1066c
1067      return
1068      end
1069
1070
1071