xref: /phasta/phSolver/incompressible/filters.f (revision 16223cb9c3f88b34f2cb94151b5cf5ffc1aac5e2)
1      subroutine hfilterC (y, x, ien, hfres, shgl, shp, Qwtf)
2
3c...  The filter operator used here uses the generalized box
4c...  kernel
5
6
7      include "common.h"
8
9      dimension y(nshg,5),             hfres(nshg,16)
10      dimension x(numnp,3),            xl(npro,nenl,3)
11      dimension ien(npro,nshl),        yl(npro,nshl,5),
12     &          fresl(npro,16),        WdetJ(npro),
13     &          u1(npro),              u2(npro),
14     &          u3(npro),
15     &          S11(npro),             S22(npro),
16     &          S33(npro),             S12(npro),
17     &          S13(npro),             S23(npro),
18     &          dxdxi(npro,nsd,nsd),   dxidx(npro,nsd,nsd),
19     &          shgl(nsd,nshl,ngauss),  shg(npro,nshl,nsd),
20     &          shp(nshl,ngauss),
21     &          fresli(npro,16),       Qwtf(ngaussf)
22
23      dimension tmp(npro)
24
25      call local (y,      yl,     ien,    5,  'gather  ')
26      call localx (x,      xl,     ien,    3,  'gather  ')
27c
28
29      fresl = zero
30
31c
32      do intp = 1, ngaussf   ! Loop over quadrature points
33
34c  calculate the metrics
35c
36c
37c.... --------------------->  Element Metrics  <-----------------------
38c
39c.... compute the deformation gradient
40c
41         dxdxi = zero
42c
43         do n = 1, nenl
44            dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp)
45            dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp)
46            dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp)
47            dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp)
48            dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp)
49            dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp)
50            dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp)
51            dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp)
52            dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp)
53         enddo
54c
55c.... compute the inverse of deformation gradient
56c
57         dxidx(:,1,1) =   dxdxi(:,2,2) * dxdxi(:,3,3)
58     &        - dxdxi(:,3,2) * dxdxi(:,2,3)
59         dxidx(:,1,2) =   dxdxi(:,3,2) * dxdxi(:,1,3)
60     &        - dxdxi(:,1,2) * dxdxi(:,3,3)
61         dxidx(:,1,3) =   dxdxi(:,1,2) * dxdxi(:,2,3)
62     &        - dxdxi(:,1,3) * dxdxi(:,2,2)
63         tmp          = one / ( dxidx(:,1,1) * dxdxi(:,1,1)
64     &        + dxidx(:,1,2) * dxdxi(:,2,1)
65     &        + dxidx(:,1,3) * dxdxi(:,3,1) )
66         dxidx(:,1,1) = dxidx(:,1,1) * tmp
67         dxidx(:,1,2) = dxidx(:,1,2) * tmp
68         dxidx(:,1,3) = dxidx(:,1,3) * tmp
69         dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1)
70     &        - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp
71         dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3)
72     &        - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp
73         dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3)
74     &        - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp
75         dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2)
76     &        - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp
77         dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2)
78     &        - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp
79         dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2)
80     &        - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp
81c
82c        wght=Qwt(lcsyst,intp)  ! may be different now
83         wght=Qwtf(intp)
84         WdetJ(:) = wght / tmp(:)
85
86
87c... compute the gradient of shape functions at the quad. point.
88
89
90      do n = 1,nshl
91        shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1)
92     &              + shgl(2,n,intp) * dxidx(:,2,1)
93     &              + shgl(3,n,intp) * dxidx(:,3,1))
94        shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2)
95     &              + shgl(2,n,intp) * dxidx(:,2,2)
96     &              + shgl(3,n,intp) * dxidx(:,3,2))
97        shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3)
98     &              + shgl(2,n,intp) * dxidx(:,2,3)
99     &              + shgl(3,n,intp) * dxidx(:,3,3))
100      enddo
101
102
103c... compute the velocities and the strain rate tensor at the quad. point
104
105
106         u1  = zero
107         u2  = zero
108         u3  = zero
109         S11 = zero
110         S22 = zero
111         S33 = zero
112         S12 = zero
113         S13 = zero
114         S23 = zero
115         do i=1,nshl
116            u1 = u1 + shp(i,intp)*yl(:,i,2)
117            u2 = u2 + shp(i,intp)*yl(:,i,3)
118            u3 = u3 + shp(i,intp)*yl(:,i,4)
119
120            S11 = S11 + shg(:,i,1)*yl(:,i,2)
121            S22 = S22 + shg(:,i,2)*yl(:,i,3)
122            S33 = S33 + shg(:,i,3)*yl(:,i,4)
123
124            S12 = S12 + shg(:,i,2)*yl(:,i,2)
125     &                       +shg(:,i,1)*yl(:,i,3)
126            S13 = S13 + shg(:,i,3)*yl(:,i,2)
127     &                       +shg(:,i,1)*yl(:,i,4)
128            S23 = S23 + shg(:,i,3)*yl(:,i,3)
129     &                       +shg(:,i,2)*yl(:,i,4)
130         enddo
131         S12 = pt5 * S12
132         S13 = pt5 * S13
133         S23 = pt5 * S23
134
135
136         fresli(:,1) = WdetJ * u1 !G * u1 * WdetJ
137         fresli(:,2) = WdetJ * u2 !G * u2 * WdetJ
138         fresli(:,3) = WdetJ * u3 !G * u3 * WdetJ
139
140         fresli(:,4) = WdetJ * u1 * u1 ! G*u1*u1*WdetJ
141         fresli(:,5) = WdetJ * u2 * u2 ! G*u2*u2*WdetJ
142         fresli(:,6) = WdetJ * u3 * u3 ! G*u3*u3*WdetJ
143         fresli(:,7) = WdetJ * u1 * u2 ! G*u1*u2*WdetJ
144         fresli(:,8) = WdetJ * u1 * u3 ! G*u1*u3*WdetJ
145         fresli(:,9) = WdetJ * u2 * u3 ! G*u2*u3*WdetJ
146
147         fresli(:,10) = S11 * WdetJ ! G*S_{1,1}*WdetJ
148         fresli(:,11) = S22 * WdetJ ! G*S_{2,2}*WdetJ
149         fresli(:,12) = S33 * WdetJ ! G*S_{3,3}*WdetJ
150         fresli(:,13) = S12 * WdetJ ! G*S_{1,1}*WdetJ
151         fresli(:,14) = S13 * WdetJ ! G*S_{1,3}*WdetJ
152         fresli(:,15) = S23 * WdetJ ! G*S_{2,3}*WdetJ
153
154         fresli(:,16) = WdetJ !Integral of filter kernel, G,
155c                                               over the element
156
157         do i = 1, 16           ! Add contribution of each quad. point
158            fresl(:,i) = fresl(:,i) + fresli(:,i)
159         enddo
160
161      enddo                     !end of loop over integration points
162c
163
164      do j = 1,nshl
165      do nel = 1,npro
166        hfres(ien(nel,j),:) = hfres(ien(nel,j),:) + fresl(nel,:)
167      enddo
168      enddo
169
170
171      return
172      end
173
174c... Here, the filter operation (denoted w/ a tilde) uses the generalized
175c... box kernel.
176
177      subroutine twohfilterB (y, x, strnrm, ien, fres,
178     &     hfres, shgl, shp, Qwtf)
179
180      include "common.h"
181
182      dimension y(nshg,ndof),            fres(nshg,33)
183      dimension x(numnp,nsd),            xl(npro,nenl,nsd)
184      dimension ien(npro,nshl),        yl(npro,nshl,ndof),
185     &          fresl(npro,33),        WdetJ(npro),
186     &          u1(npro),              u2(npro),
187     &          u3(npro),              dxdxi(npro,nsd,nsd),
188     &          strnrm(npro,ngauss),    dxidx(npro,nsd,nsd),
189     &          shgl(nsd,nshl,ngauss),       shg(npro,nshl,nsd),
190     &          shp(nshl,ngauss),
191     &          fresli(npro,33),       Qwtf(ngaussf),
192     &          hfres(nshg,16),        hfresl(npro,nshl,16),
193     &          S(npro,nshl),          SS11(npro,nshl),
194     &          SS22(npro,nshl),       SS33(npro,nshl),
195     &          SS12(npro,nshl),       SS13(npro,nshl),
196     &          SS23(npro,nshl),
197     &          S11p(npro),            S22p(npro),
198     &          S33p(npro),            S12p(npro),
199     &          S13p(npro),            S23p(npro)
200
201      dimension tmp(npro)
202
203      call local (y,      yl,     ien,    5,  'gather  ')
204      call localx (x,      xl,     ien,    3,  'gather  ')
205      call local (hfres,  hfresl, ien,   16,  'gather  ')
206
207      S(:,:) = sqrt(
208     &     two*(hfresl(:,:,10)**2 + hfresl(:,:,11)**2 +
209     &     hfresl(:,:,12)**2) + four*(
210     &     hfresl(:,:,13)**2 + hfresl(:,:,14)**2 +
211     &     hfresl(:,:,15)**2) )
212
213      SS11(:,:) = S(:,:)*hfresl(:,:,10)
214      SS22(:,:) = S(:,:)*hfresl(:,:,11)
215      SS33(:,:) = S(:,:)*hfresl(:,:,12)
216      SS12(:,:) = S(:,:)*hfresl(:,:,13)
217      SS13(:,:) = S(:,:)*hfresl(:,:,14)
218      SS23(:,:) = S(:,:)*hfresl(:,:,15)
219
220      fresl = zero
221
222      do intp = 1, ngauss
223
224
225c  calculate the metrics
226c
227c
228c.... --------------------->  Element Metrics  <-----------------------
229c
230c.... compute the deformation gradient
231c
232        dxdxi = zero
233c
234          do n = 1, nenl
235            dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp)
236            dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp)
237            dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp)
238            dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp)
239            dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp)
240            dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp)
241            dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp)
242            dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp)
243            dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp)
244          enddo
245c
246c.... compute the inverse of deformation gradient
247c
248        dxidx(:,1,1) =   dxdxi(:,2,2) * dxdxi(:,3,3)
249     &                 - dxdxi(:,3,2) * dxdxi(:,2,3)
250        dxidx(:,1,2) =   dxdxi(:,3,2) * dxdxi(:,1,3)
251     &                 - dxdxi(:,1,2) * dxdxi(:,3,3)
252        dxidx(:,1,3) =   dxdxi(:,1,2) * dxdxi(:,2,3)
253     &                 - dxdxi(:,1,3) * dxdxi(:,2,2)
254        tmp          = one / ( dxidx(:,1,1) * dxdxi(:,1,1)
255     &                       + dxidx(:,1,2) * dxdxi(:,2,1)
256     &                       + dxidx(:,1,3) * dxdxi(:,3,1) )
257        dxidx(:,1,1) = dxidx(:,1,1) * tmp
258        dxidx(:,1,2) = dxidx(:,1,2) * tmp
259        dxidx(:,1,3) = dxidx(:,1,3) * tmp
260        dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1)
261     &                - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp
262        dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3)
263     &                - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp
264        dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3)
265     &                - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp
266        dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2)
267     &                - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp
268        dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2)
269     &                - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp
270        dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2)
271     &                - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp
272c
273        wght=Qwt(lcsyst,intp)  ! may be different now
274c        wght=Qwtf(intp)
275        WdetJ = wght / tmp
276c
277
278      do n = 1,nshl
279        shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1)
280     &              + shgl(2,n,intp) * dxidx(:,2,1)
281     &              + shgl(3,n,intp) * dxidx(:,3,1))
282        shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2)
283     &              + shgl(2,n,intp) * dxidx(:,2,2)
284     &              + shgl(3,n,intp) * dxidx(:,3,2))
285        shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3)
286     &              + shgl(2,n,intp) * dxidx(:,2,3)
287     &              + shgl(3,n,intp) * dxidx(:,3,3))
288      enddo
289
290c... compute filtered quantities at the hat level evaluated at the quad. pt.
291c... This should really
292c... be the bar-hat level, but the bar filter is implicit so we don't
293c... bother to mention it.
294
295      fresli = zero
296      S11p   = zero
297      S22p   = zero
298      S33p   = zero
299      S12p   = zero
300      S13p   = zero
301      S23p   = zero
302
303      do i = 1, nenl
304         fresli(:,1) = fresli(:,1) + shp(i,intp)*hfresl(:,i,1)  ! hat{u1}
305         fresli(:,2) = fresli(:,2) + shp(i,intp)*hfresl(:,i,2)  ! hat{u2}
306         fresli(:,3) = fresli(:,3) + shp(i,intp)*hfresl(:,i,3)  ! hat{u3}
307
308         fresli(:,4) = fresli(:,4) + shp(i,intp)*hfresl(:,i,1)*
309     &        hfresl(:,i,1)     ! hat{u1}*hat{u1}
310         fresli(:,5) = fresli(:,5) + shp(i,intp)*hfresl(:,i,2)*
311     &        hfresl(:,i,2)     ! hat{u2}*hat{u2}
312         fresli(:,6) = fresli(:,6) + shp(i,intp)*hfresl(:,i,3)*
313     &        hfresl(:,i,3)     ! hat{u3}*hat{u3}
314         fresli(:,7) = fresli(:,7) + shp(i,intp)*hfresl(:,i,1)*
315     &        hfresl(:,i,2)     ! hat{u1}*hat{u2}
316         fresli(:,8) = fresli(:,8) + shp(i,intp)*hfresl(:,i,1)*
317     &        hfresl(:,i,3)     ! hat{u1}*hat{u3}
318         fresli(:,9) = fresli(:,9) + shp(i,intp)*hfresl(:,i,2)*
319     &        hfresl(:,i,3)     ! hat{u2}*hat{u3}
320
321         fresli(:,10) = fresli(:,10) + shp(i,intp)*hfresl(:,i,10)  ! hat{S11}
322         fresli(:,11) = fresli(:,11) + shp(i,intp)*hfresl(:,i,11)  ! hat{S22}
323         fresli(:,12) = fresli(:,12) + shp(i,intp)*hfresl(:,i,12)  ! hat{S33}
324         fresli(:,13) = fresli(:,13) + shp(i,intp)*hfresl(:,i,13)  ! hat{S12}
325         fresli(:,14) = fresli(:,14) + shp(i,intp)*hfresl(:,i,14)  ! hat{S13}
326         fresli(:,15) = fresli(:,15) + shp(i,intp)*hfresl(:,i,15)  ! hat{S23}
327
328         S11p = S11p + shg(:,i,1)*hfresl(:,i,1)
329         S22p = S22p + shg(:,i,2)*hfresl(:,i,2)
330         S33p = S33p + shg(:,i,3)*hfresl(:,i,3)
331
332         S12p = S12p + shg(:,i,2)*hfresl(:,i,1) +
333     &        shg(:,i,1)*hfresl(:,i,2)
334         S13p = S13p + shg(:,i,3)*hfresl(:,i,1) +
335     &        shg(:,i,1)*hfresl(:,i,3)
336         S23p = S23p + shg(:,i,3)*hfresl(:,i,2) +
337     &        shg(:,i,2)*hfresl(:,i,3)
338
339      enddo
340
341c... get the strain rate tensor norm at the quad. pt.
342
343c      strnrm(:,intp) = sqrt(
344c     &   two * (fresli(:,10)**2 + fresli(:,11)**2 + fresli(:,12)**2)
345c     &  + four * ( fresli(:,13)**2 + fresli(:,14)**2 +
346c     &    fresli(:,15)**2 ) )
347
348c      strnrm2(:,intp) = zero
349c      do i = 1, nenl
350c         strnrm2(:,intp) = strnrm2(:,intp) + S(:,i)*shp(i,intp)
351c      enddo
352
353c      strnrm3(:,intp) = sqrt(
354c     &     two * (S11p(:)**2 + S22p(:)**2 + S33p(:)**2)
355c     &    + four * ( S12p(:)**2 + S13p(:)**2 + S23p(:)**2) )
356
357c... compute |hat{S}| * hat{Sij}
358
359      do i = 1, nenl
360         fresli(:,16) =fresli(:,16)+shp(i,intp)*SS11(:,i)! |hat{S}|*hat{S11}
361         fresli(:,17) =fresli(:,17)+shp(i,intp)*SS22(:,i)! |hat{S}|*hat{S22}
362         fresli(:,18) =fresli(:,18)+shp(i,intp)*SS33(:,i)! |hat{S}|*hat{S33}
363         fresli(:,19) =fresli(:,19)+shp(i,intp)*SS12(:,i)! |hat{S}|*hat{S12}
364         fresli(:,20) =fresli(:,20)+shp(i,intp)*SS13(:,i)! |hat{S}|*hat{S13}
365         fresli(:,21) =fresli(:,21)+shp(i,intp)*SS23(:,i)! |hat{S}|*hat{S23}
366      enddo
367
368c... multiply fresli by WdetJ so that when we finish looping over
369c... quad. pts. and add the contributions from all the quad. points
370c... we get filtered quantities at the hat-tilde level, secretly the
371c... bar-hat-tilde level.
372
373      do j = 1, 21
374         fresli(:,j) = fresli(:,j)*WdetJ(:)
375      enddo
376
377c... compute volume of box kernel
378
379      fresli(:,22) = WdetJ
380
381c... add contributions from each quad pt to current element
382
383      do i = 1, 22
384         fresl(:,i) = fresl(:,i) + fresli(:,i)
385      enddo
386
387      enddo ! end of loop over integration points
388
389
390c... scatter locally filtered quantities to the global nodes
391
392      do j = 1,nshl
393      do nel = 1,npro
394        fres(ien(nel,j),:) = fres(ien(nel,j),:) + fresl(nel,:)
395      enddo
396      enddo
397
398      return
399      end
400
401c...  The filter operator used here uses the generalized box
402c...  kernel
403
404      subroutine hfilterCC (y, x, ien, hfres, shgl, shp, Qwtf)
405
406      include "common.h"
407
408      dimension y(nshg,5),             hfres(nshg,22)
409      dimension x(numnp,3),            xl(npro,nenl,3)
410      dimension ien(npro,nshl),        yl(npro,nshl,5),
411     &          fresl(npro,22),        WdetJ(npro),
412     &          u1(npro),              u2(npro),
413     &          u3(npro),
414     &          S11(npro),             S22(npro),
415     &          S33(npro),             S12(npro),
416     &          S13(npro),             S23(npro),
417     &          dxdxi(npro,nsd,nsd),   dxidx(npro,nsd,nsd),
418     &          shgl(nsd,nshl,ngauss),  shg(npro,nshl,nsd),
419     &          shp(nshl,ngauss),
420     &          fresli(npro,22),       Qwtf(ngaussf),
421     &          strnrmi(npro)
422
423      dimension tmp(npro)
424
425      call local (y,      yl,     ien,    5,  'gather  ')
426      call localx (x,      xl,     ien,    3,  'gather  ')
427c
428
429      fresl = zero
430
431c
432      do intp = 1, ngaussf   ! Loop over quadrature points
433
434c  calculate the metrics
435c
436c
437c.... --------------------->  Element Metrics  <-----------------------
438c
439c.... compute the deformation gradient
440c
441         dxdxi = zero
442c
443         do n = 1, nenl
444            dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp)
445            dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp)
446            dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp)
447            dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp)
448            dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp)
449            dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp)
450            dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp)
451            dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp)
452            dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp)
453         enddo
454c
455c.... compute the inverse of deformation gradient
456c
457         dxidx(:,1,1) =   dxdxi(:,2,2) * dxdxi(:,3,3)
458     &        - dxdxi(:,3,2) * dxdxi(:,2,3)
459         dxidx(:,1,2) =   dxdxi(:,3,2) * dxdxi(:,1,3)
460     &        - dxdxi(:,1,2) * dxdxi(:,3,3)
461         dxidx(:,1,3) =   dxdxi(:,1,2) * dxdxi(:,2,3)
462     &        - dxdxi(:,1,3) * dxdxi(:,2,2)
463         tmp          = one / ( dxidx(:,1,1) * dxdxi(:,1,1)
464     &        + dxidx(:,1,2) * dxdxi(:,2,1)
465     &        + dxidx(:,1,3) * dxdxi(:,3,1) )
466         dxidx(:,1,1) = dxidx(:,1,1) * tmp
467         dxidx(:,1,2) = dxidx(:,1,2) * tmp
468         dxidx(:,1,3) = dxidx(:,1,3) * tmp
469         dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1)
470     &        - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp
471         dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3)
472     &        - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp
473         dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3)
474     &        - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp
475         dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2)
476     &        - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp
477         dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2)
478     &        - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp
479         dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2)
480     &        - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp
481c
482c         wght=Qwt(lcsyst,intp)  ! may be different now
483         wght=Qwtf(intp)
484         WdetJ(:) = wght / tmp(:)
485
486
487c... compute the gradient of shape functions at the quad. point.
488
489
490      do n = 1,nshl
491        shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1)
492     &              + shgl(2,n,intp) * dxidx(:,2,1)
493     &              + shgl(3,n,intp) * dxidx(:,3,1))
494        shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2)
495     &              + shgl(2,n,intp) * dxidx(:,2,2)
496     &              + shgl(3,n,intp) * dxidx(:,3,2))
497        shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3)
498     &              + shgl(2,n,intp) * dxidx(:,2,3)
499     &              + shgl(3,n,intp) * dxidx(:,3,3))
500      enddo
501
502
503c... compute the velocities and the strain rate tensor at the quad. point
504
505
506         u1  = zero
507         u2  = zero
508         u3  = zero
509         S11 = zero
510         S22 = zero
511         S33 = zero
512         S12 = zero
513         S13 = zero
514         S23 = zero
515         do i=1,nshl
516            u1 = u1 + shp(i,intp)*yl(:,i,2)
517            u2 = u2 + shp(i,intp)*yl(:,i,3)
518            u3 = u3 + shp(i,intp)*yl(:,i,4)
519
520            S11 = S11 + shg(:,i,1)*yl(:,i,2)
521            S22 = S22 + shg(:,i,2)*yl(:,i,3)
522            S33 = S33 + shg(:,i,3)*yl(:,i,4)
523
524            S12 = S12 + shg(:,i,2)*yl(:,i,2)
525     &                       +shg(:,i,1)*yl(:,i,3)
526            S13 = S13 + shg(:,i,3)*yl(:,i,2)
527     &                       +shg(:,i,1)*yl(:,i,4)
528            S23 = S23 + shg(:,i,3)*yl(:,i,3)
529     &                       +shg(:,i,2)*yl(:,i,4)
530         enddo
531         S12 = pt5 * S12
532         S13 = pt5 * S13
533         S23 = pt5 * S23
534
535c... Get the strain rate norm at the quad pts
536
537         strnrmi = sqrt( two*(S11**2 + S22**2 + S33**2)
538     &        + four*(S12**2 + S13**2 + S23**2) )
539
540
541c... Multiply quantities to be filtered by the box kernel
542
543         fresli(:,1) = WdetJ * u1 !G * u1 * WdetJ
544         fresli(:,2) = WdetJ * u2 !G * u2 * WdetJ
545         fresli(:,3) = WdetJ * u3 !G * u3 * WdetJ
546
547         fresli(:,4) = WdetJ * u1 * u1 ! G*u1*u1*WdetJ
548         fresli(:,5) = WdetJ * u2 * u2 ! G*u2*u2*WdetJ
549         fresli(:,6) = WdetJ * u3 * u3 ! G*u3*u3*WdetJ
550         fresli(:,7) = WdetJ * u1 * u2 ! G*u1*u2*WdetJ
551         fresli(:,8) = WdetJ * u1 * u3 ! G*u1*u3*WdetJ
552         fresli(:,9) = WdetJ * u2 * u3 ! G*u2*u3*WdetJ
553
554         fresli(:,10) = S11 * WdetJ ! G*S_{1,1}*WdetJ
555         fresli(:,11) = S22 * WdetJ ! G*S_{2,2}*WdetJ
556         fresli(:,12) = S33 * WdetJ ! G*S_{3,3}*WdetJ
557         fresli(:,13) = S12 * WdetJ ! G*S_{1,1}*WdetJ
558         fresli(:,14) = S13 * WdetJ ! G*S_{1,3}*WdetJ
559         fresli(:,15) = S23 * WdetJ ! G*S_{2,3}*WdetJ
560
561         fresli(:,16) = WdetJ !Integral of filter kernel, G,
562c                                               over the element
563
564         fresli(:,17) = S11 * strnrmi * WdetJ
565         fresli(:,18) = S22 * strnrmi * WdetJ
566         fresli(:,19) = S33 * strnrmi * WdetJ
567         fresli(:,20) = S12 * strnrmi * WdetJ
568         fresli(:,21) = S13 * strnrmi * WdetJ
569         fresli(:,22) = S23 * strnrmi * WdetJ
570
571
572         do i = 1, 22           ! Add contribution of each quad. point
573            fresl(:,i) = fresl(:,i) + fresli(:,i)
574         enddo
575
576      enddo                     !end of loop over integration points
577c
578
579      do j = 1,nshl
580      do nel = 1,npro
581        hfres(ien(nel,j),:) = hfres(ien(nel,j),:) + fresl(nel,:)
582      enddo
583      enddo
584
585
586      return
587      end
588
589
590c... Here, the filter operation (denoted w/ a tilde) uses the generalized
591c... box kernel.
592
593      subroutine twohfilterBB (y, x, strnrm, ien, fres,
594     &     hfres, shgl, shp, Qwtf)
595
596      include "common.h"
597
598      dimension y(nshg,ndof),            fres(nshg,33)
599      dimension x(numnp,nsd),            xl(npro,nenl,nsd)
600      dimension ien(npro,nshl),        yl(npro,nshl,ndof),
601     &          fresl(npro,33),        WdetJ(npro),
602     &          u1(npro),              u2(npro),
603     &          u3(npro),              dxdxi(npro,nsd,nsd),
604     &          strnrm(npro,ngauss),    dxidx(npro,nsd,nsd),
605     &          shgl(nsd,nshl,ngauss),       shg(npro,nshl,nsd),
606     &          shp(nshl,ngauss),
607     &          fresli(npro,33),       Qwtf(ngaussf),
608     &          hfres(nshg,22),        hfresl(npro,nshl,22),
609     &          S(npro,nshl),          SS11(npro,nshl),
610     &          SS22(npro,nshl),       SS33(npro,nshl),
611     &          SS12(npro,nshl),       SS13(npro,nshl),
612     &          SS23(npro,nshl)
613
614      dimension tmp(npro)
615
616      call local (y,      yl,     ien,    5,  'gather  ')
617      call localx (x,      xl,     ien,    3,  'gather  ')
618      call local (hfres,  hfresl, ien,   22,  'gather  ')
619
620      S(:,:) = sqrt(
621     &     two*(hfresl(:,:,10)**2 + hfresl(:,:,11)**2 +
622     &     hfresl(:,:,12)**2) + four*(
623     &     hfresl(:,:,13)**2 + hfresl(:,:,14)**2 +
624     &     hfresl(:,:,15)**2) )
625
626      SS11(:,:) = S(:,:)*hfresl(:,:,10)
627      SS22(:,:) = S(:,:)*hfresl(:,:,11)
628      SS33(:,:) = S(:,:)*hfresl(:,:,12)
629      SS12(:,:) = S(:,:)*hfresl(:,:,13)
630      SS13(:,:) = S(:,:)*hfresl(:,:,14)
631      SS23(:,:) = S(:,:)*hfresl(:,:,15)
632
633      fresl = zero
634
635      do intp = 1, ngaussf
636
637
638c  calculate the metrics
639c
640c
641c.... --------------------->  Element Metrics  <-----------------------
642c
643c.... compute the deformation gradient
644c
645        dxdxi = zero
646c
647          do n = 1, nenl
648            dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp)
649            dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp)
650            dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp)
651            dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp)
652            dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp)
653            dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp)
654            dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp)
655            dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp)
656            dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp)
657          enddo
658c
659c.... compute the inverse of deformation gradient
660c
661        dxidx(:,1,1) =   dxdxi(:,2,2) * dxdxi(:,3,3)
662     &                 - dxdxi(:,3,2) * dxdxi(:,2,3)
663        dxidx(:,1,2) =   dxdxi(:,3,2) * dxdxi(:,1,3)
664     &                 - dxdxi(:,1,2) * dxdxi(:,3,3)
665        dxidx(:,1,3) =   dxdxi(:,1,2) * dxdxi(:,2,3)
666     &                 - dxdxi(:,1,3) * dxdxi(:,2,2)
667        tmp          = one / ( dxidx(:,1,1) * dxdxi(:,1,1)
668     &                       + dxidx(:,1,2) * dxdxi(:,2,1)
669     &                       + dxidx(:,1,3) * dxdxi(:,3,1) )
670        dxidx(:,1,1) = dxidx(:,1,1) * tmp
671        dxidx(:,1,2) = dxidx(:,1,2) * tmp
672        dxidx(:,1,3) = dxidx(:,1,3) * tmp
673        dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1)
674     &                - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp
675        dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3)
676     &                - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp
677        dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3)
678     &                - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp
679        dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2)
680     &                - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp
681        dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2)
682     &                - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp
683        dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2)
684     &                - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp
685c
686c        wght=Qwt(lcsyst,intp)  ! may be different now
687        wght=Qwtf(intp)
688        WdetJ = wght / tmp
689c
690
691      do n = 1,nshl
692        shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1)
693     &              + shgl(2,n,intp) * dxidx(:,2,1)
694     &              + shgl(3,n,intp) * dxidx(:,3,1))
695        shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2)
696     &              + shgl(2,n,intp) * dxidx(:,2,2)
697     &              + shgl(3,n,intp) * dxidx(:,3,2))
698        shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3)
699     &              + shgl(2,n,intp) * dxidx(:,2,3)
700     &              + shgl(3,n,intp) * dxidx(:,3,3))
701      enddo
702
703c... compute filtered quantities at the hat level evaluated at the quad. pt.
704c... This should really
705c... be the bar-hat level, but the bar filter is implicit so we don't
706c... bother to mention it.
707
708      fresli = zero
709      S11p   = zero
710      S22p   = zero
711      S33p   = zero
712      S12p   = zero
713      S13p   = zero
714      S23p   = zero
715
716      do i = 1, nenl
717         fresli(:,1) = fresli(:,1) + shp(i,intp)*hfresl(:,i,1)  ! hat{u1}
718         fresli(:,2) = fresli(:,2) + shp(i,intp)*hfresl(:,i,2)  ! hat{u2}
719         fresli(:,3) = fresli(:,3) + shp(i,intp)*hfresl(:,i,3)  ! hat{u3}
720
721         fresli(:,4) = fresli(:,4) + shp(i,intp)*hfresl(:,i,4) ! hat{u1*u1}
722         fresli(:,5) = fresli(:,5) + shp(i,intp)*hfresl(:,i,5) ! hat{u2*u2}
723         fresli(:,6) = fresli(:,6) + shp(i,intp)*hfresl(:,i,6) ! hat{u3*u3}
724         fresli(:,7) = fresli(:,7) + shp(i,intp)*hfresl(:,i,7) ! hat{u1*u2}
725         fresli(:,8) = fresli(:,8) + shp(i,intp)*hfresl(:,i,8) ! hat{u1*u3}
726         fresli(:,9) = fresli(:,9) + shp(i,intp)*hfresl(:,i,9) ! hat{u2*u3}
727
728         fresli(:,10) = fresli(:,10) + shp(i,intp)*hfresl(:,i,10)  ! hat{S11}
729         fresli(:,11) = fresli(:,11) + shp(i,intp)*hfresl(:,i,11)  ! hat{S22}
730         fresli(:,12) = fresli(:,12) + shp(i,intp)*hfresl(:,i,12)  ! hat{S33}
731         fresli(:,13) = fresli(:,13) + shp(i,intp)*hfresl(:,i,13)  ! hat{S12}
732         fresli(:,14) = fresli(:,14) + shp(i,intp)*hfresl(:,i,14)  ! hat{S13}
733         fresli(:,15) = fresli(:,15) + shp(i,intp)*hfresl(:,i,15)  ! hat{S23}
734
735         fresli(:,16) = fresli(:,16) +shp(i,intp)*hfresl(:,i,17)! hat{S11*|S|}
736         fresli(:,17) = fresli(:,17) +shp(i,intp)*hfresl(:,i,18)! hat{S22*|S|}
737         fresli(:,18) = fresli(:,18) +shp(i,intp)*hfresl(:,i,19)! hat{S33*|S|}
738         fresli(:,19) = fresli(:,19) +shp(i,intp)*hfresl(:,i,20)! hat{S12*|S|}
739         fresli(:,20) = fresli(:,20) +shp(i,intp)*hfresl(:,i,21)! hat{S13*|S|}
740         fresli(:,21) = fresli(:,21) +shp(i,intp)*hfresl(:,i,22)! hat{S23*|S|}
741
742
743      enddo
744
745c... multiply fresli by WdetJ so that when we finish looping over
746c... quad. pts. and add the contributions from all the quad. points
747c... we get filtered quantities at the hat-tilde level, secretly the
748c... bar-hat-tilde level.
749
750      do j = 1, 21
751         fresli(:,j) = fresli(:,j)*WdetJ(:)
752      enddo
753
754c... compute volume of box kernel
755
756      fresli(:,22) = WdetJ
757
758c... add contributions from each quad pt to current element
759
760      do i = 1, 22
761         fresl(:,i) = fresl(:,i) + fresli(:,i)
762      enddo
763
764      enddo ! end of loop over integration points
765
766
767c... scatter locally filtered quantities to the global nodes
768
769      do j = 1,nshl
770      do nel = 1,npro
771        fres(ien(nel,j),:) = fres(ien(nel,j),:) + fresl(nel,:)
772      enddo
773      enddo
774
775      return
776      end
777
778
779c...  The filter operator used here uses the generalized hat (witch hat)
780c...  kernel
781
782      subroutine hfilterBB (y, x, ien, hfres, shgl, shp, Qwtf)
783
784      include "common.h"
785
786      dimension y(nshg,5),             hfres(nshg,24)
787      dimension x(numnp,3),            xl(npro,nenl,3)
788      dimension ien(npro,nshl),        yl(npro,nshl,5),
789     &          fresl(npro,nshl,24),        WdetJ(npro),
790     &          u1(npro),              u2(npro),
791     &          u3(npro),              rho(npro),
792     &          S11(npro),             S22(npro),
793     &          S33(npro),             S12(npro),
794     &          S13(npro),             S23(npro),
795     &          dxdxi(npro,nsd,nsd),   dxidx(npro,nsd,nsd),
796     &          shgl(nsd,nshl,ngauss),       shg(npro,nshl,nsd),
797     &          shp(nshl,ngauss),
798     &          fresli(npro,nshl,24),   Qwtf(ngaussf),
799     &          strnrmi(npro)
800
801
802      dimension tmp(npro)
803
804      call local (y,      yl,     ien,    5,  'gather  ')
805      call localx (x,      xl,     ien,    3,  'gather  ')
806c
807
808      fresl = zero
809
810c
811      do intp = 1, ngaussf   ! Loop over quadrature points
812
813c  calculate the metrics
814c
815c
816c.... --------------------->  Element Metrics  <-----------------------
817c
818c.... compute the deformation gradient
819c
820         dxdxi = zero
821c
822         do n = 1, nenl
823            dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp)
824            dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp)
825            dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp)
826            dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp)
827            dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp)
828            dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp)
829            dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp)
830            dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp)
831            dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp)
832         enddo
833c
834c.... compute the inverse of deformation gradient
835c
836         dxidx(:,1,1) =   dxdxi(:,2,2) * dxdxi(:,3,3)
837     &        - dxdxi(:,3,2) * dxdxi(:,2,3)
838         dxidx(:,1,2) =   dxdxi(:,3,2) * dxdxi(:,1,3)
839     &        - dxdxi(:,1,2) * dxdxi(:,3,3)
840         dxidx(:,1,3) =   dxdxi(:,1,2) * dxdxi(:,2,3)
841     &        - dxdxi(:,1,3) * dxdxi(:,2,2)
842         tmp          = one / ( dxidx(:,1,1) * dxdxi(:,1,1)
843     &        + dxidx(:,1,2) * dxdxi(:,2,1)
844     &        + dxidx(:,1,3) * dxdxi(:,3,1) )
845         dxidx(:,1,1) = dxidx(:,1,1) * tmp
846         dxidx(:,1,2) = dxidx(:,1,2) * tmp
847         dxidx(:,1,3) = dxidx(:,1,3) * tmp
848         dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1)
849     &        - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp
850         dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3)
851     &        - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp
852         dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3)
853     &        - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp
854         dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2)
855     &        - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp
856         dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2)
857     &        - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp
858         dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2)
859     &        - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp
860c
861c        wght=Qwt(lcsyst,intp)  ! may be different now
862         wght=Qwtf(intp)
863         WdetJ(:) = wght / tmp(:)
864
865
866c... compute the gradient of shape functions at the quad. point.
867
868
869      do n = 1,nshl
870        shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1)
871     &              + shgl(2,n,intp) * dxidx(:,2,1)
872     &              + shgl(3,n,intp) * dxidx(:,3,1))
873        shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2)
874     &              + shgl(2,n,intp) * dxidx(:,2,2)
875     &              + shgl(3,n,intp) * dxidx(:,3,2))
876        shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3)
877     &              + shgl(2,n,intp) * dxidx(:,2,3)
878     &              + shgl(3,n,intp) * dxidx(:,3,3))
879      enddo
880
881
882c... compute the velocities and the strain rate tensor at the quad. point
883
884
885         u1  = zero
886         u2  = zero
887         u3  = zero
888         S11 = zero
889         S22 = zero
890         S33 = zero
891         S12 = zero
892         S13 = zero
893         S23 = zero
894         do i=1,nshl
895            u1 = u1 + shp(i,intp)*yl(:,i,2)
896            u2 = u2 + shp(i,intp)*yl(:,i,3)
897            u3 = u3 + shp(i,intp)*yl(:,i,4)
898
899            S11 = S11 + shg(:,i,1)*yl(:,i,2)
900            S22 = S22 + shg(:,i,2)*yl(:,i,3)
901            S33 = S33 + shg(:,i,3)*yl(:,i,4)
902
903            S12 = S12 + shg(:,i,2)*yl(:,i,2)
904     &                       +shg(:,i,1)*yl(:,i,3)
905            S13 = S13 + shg(:,i,3)*yl(:,i,2)
906     &                       +shg(:,i,1)*yl(:,i,4)
907            S23 = S23 + shg(:,i,3)*yl(:,i,3)
908     &                       +shg(:,i,2)*yl(:,i,4)
909         enddo
910         S12 = pt5 * S12
911         S13 = pt5 * S13
912         S23 = pt5 * S23
913
914c... Get the strain rate norm at the quad pts
915
916         strnrmi = sqrt( two*(S11**2 + S22**2 + S33**2)
917     &        + four*(S12**2 + S13**2 + S23**2) )
918
919
920c... Loop over element nodes and multiply u_{i} and S_{i,j} by the
921c... hat kernel and the Jacobian over the current element evaluated at
922c... the current quad. point.
923
924         do i = 1, nenl   ! Loop over element nodes
925
926            fresli(:,i,1) = WdetJ * u1 * shp(i,intp) !G * u1 * WdetJ
927            fresli(:,i,2) = WdetJ * u2 * shp(i,intp) !G * u2 * WdetJ
928            fresli(:,i,3) = WdetJ * u3 * shp(i,intp) !G * u3 * WdetJ
929
930            fresli(:,i,4) = WdetJ * u1 * u1 * shp(i,intp) ! G*u1*u1*WdetJ
931            fresli(:,i,5) = WdetJ * u2 * u2 * shp(i,intp) ! G*u2*u2*WdetJ
932            fresli(:,i,6) = WdetJ * u3 * u3 * shp(i,intp) ! G*u3*u3*WdetJ
933            fresli(:,i,7) = WdetJ * u1 * u2 * shp(i,intp) ! G*u1*u2*WdetJ
934            fresli(:,i,8) = WdetJ * u1 * u3 * shp(i,intp) ! G*u1*u3*WdetJ
935            fresli(:,i,9) = WdetJ * u2 * u3 * shp(i,intp) ! G*u2*u3*WdetJ
936
937            fresli(:,i,10) = S11 * shp(i,intp) * WdetJ ! G*S_{1,1}*WdetJ
938            fresli(:,i,11) = S22 * shp(i,intp) * WdetJ ! G*S_{2,2}*WdetJ
939            fresli(:,i,12) = S33 * shp(i,intp) * WdetJ ! G*S_{3,3}*WdetJ
940            fresli(:,i,13) = S12 * shp(i,intp) * WdetJ ! G*S_{1,1}*WdetJ
941            fresli(:,i,14) = S13 * shp(i,intp) * WdetJ ! G*S_{1,3}*WdetJ
942            fresli(:,i,15) = S23 * shp(i,intp) * WdetJ ! G*S_{2,3}*WdetJ
943
944            fresli(:,i,22) = WdetJ*shp(i,intp) !Integral of filter kernel, G,
945c                                               over the element
946
947c...   Get G*|S|*S_{i,j}*WdetJ
948
949            fresli(:,i,16) = S11 * strnrmi * shp(i,intp) * WdetJ
950            fresli(:,i,17) = S22 * strnrmi * shp(i,intp) * WdetJ
951            fresli(:,i,18) = S33 * strnrmi * shp(i,intp) * WdetJ
952            fresli(:,i,19) = S12 * strnrmi * shp(i,intp) * WdetJ
953            fresli(:,i,20) = S13 * strnrmi * shp(i,intp) * WdetJ
954            fresli(:,i,21) = S23 * strnrmi * shp(i,intp) * WdetJ
955
956            do j = 1, 22 ! Add contribution of each quad. point for each
957c                          element node
958               fresl(:,i,j) = fresl(:,i,j) + fresli(:,i,j)
959            enddo
960
961         enddo                  !end loop over element nodes
962
963      enddo                     !end of loop over integration points
964
965
966      call local (hfres, fresl, ien, 24, 'scatter ')
967
968      return
969      end
970
971      subroutine ediss (y,           ac,         shgl,
972     &                  shp,         iper,       ilwork,
973     &                  nsons,       ifath,      x,
974     &                  iBC,    BC,  xavegt)
975
976
977      use pvsQbi           ! brings in NABI
978      use stats            !
979      use pointer_data     ! brings in the pointers for the blocked arrays
980      use local_mass
981      use rlssave  ! Use the resolved Leonard stresses at the nodes.
982      use quadfilt ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf),
983c                    shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf).
984c                    Shpf and shglf are the shape funciotns and their
985c                    gradient evaluated using the quadrature rule desired
986c                    for computing the dmod. Qwt contains the weights of the
987c                    quad. points.
988
989
990
991      include "common.h"
992      include "mpif.h"
993      include "auxmpi.h"
994
995
996      dimension y(nshg,ndof),                  ac(nshg,ndof),
997     &          ifath(nshg),                   nsons(nshg),
998     &          iper(nshg),                    ilwork(nlwork),
999     &          shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT),
1000     &          x(numnp,3),
1001     &          qres(nshg,nsd*nsd),             rmass(nshg),
1002     &          iBC(nshg),                      BC(nshg,ndofBC),
1003     &          cdelsq(nshg),                   vol(nshg),
1004     &          stress(nshg,9),                 diss(nshg,3),
1005     &          xave(nshg,12),                  xaveg(nfath,12),
1006     &          xavegr(nfath,12),           xavegt(nfath,12),
1007     &          rnum(nfath),                rden(nfath)
1008
1009
1010c.... First let us obtain cdelsq at each node in the domain.
1011c.... We use numNden which lives in the quadfilt module.
1012
1013      rnum(ifath(:)) = numNden(:,1)
1014      rden(ifath(:)) = numNden(:,2)
1015
1016c      if (myrank .eq. master) then
1017c         write(*,*) 'rnum25=', rnum(25), rden(25)
1018c         write(*,*) 'rnum26=', rnum(26), rden(26)
1019c      endif
1020
1021      cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
1022c      cdelsq(:) = zero ! Debugging
1023
1024      if (istep .eq. 0) then
1025         xavegt = zero  ! For averaging dissipations and SUPG stresses
1026      endif
1027
1028        if (idiff==1 .or. idiff==3) then ! global reconstruction of qdiff
1029c
1030c loop over element blocks for the global reconstruction
1031c of the diffusive flux vector, q, and lumped mass matrix, rmass
1032c
1033           qres = zero
1034           rmass = zero
1035
1036           do iblk = 1, nelblk
1037              iel    = lcblk(1,iblk)
1038              lelCat = lcblk(2,iblk)
1039              lcsyst = lcblk(3,iblk)
1040              iorder = lcblk(4,iblk)
1041              nenl   = lcblk(5,iblk) ! no. of vertices per element
1042              nshl   = lcblk(10,iblk)
1043              mattyp = lcblk(7,iblk)
1044              ndofl  = lcblk(8,iblk)
1045              nsymdl = lcblk(9,iblk)
1046              npro   = lcblk(1,iblk+1) - iel
1047              ngauss = nint(lcsyst)
1048c
1049c.... compute and assemble diffusive flux vector residual, qres,
1050c     and lumped mass matrix, rmass
1051
1052              call AsIq (y,                x,
1053     &                   shp(lcsyst,1:nshl,:),
1054     &                   shgl(lcsyst,:,1:nshl,:),
1055     &                   mien(iblk)%p,     mxmudmi(iblk)%p,
1056     &                   qres,             rmass )
1057           enddo
1058
1059c
1060c.... form the diffusive flux approximation
1061c
1062           call qpbc( rmass, qres, iBC, BC, iper, ilwork )
1063c
1064        endif
1065
1066
1067c.... form the SUPG stresses well as dissipation due to eddy viscosity,
1068c...  and SUPG stabilization.
1069
1070
1071        stress = zero
1072        vol    = zero
1073        diss   = zero
1074
1075      do iblk = 1,nelblk
1076        lcsyst = lcblk(3,iblk)
1077        iel  = lcblk(1,iblk) !Element number where this block begins
1078        npro = lcblk(1,iblk+1) - iel
1079        lelCat = lcblk(2,iblk)
1080        nenl = lcblk(5,iblk)
1081        nshl = lcblk(10,iblk)
1082        inum  = iel + npro - 1
1083
1084        ngauss = nint(lcsyst)
1085        ngaussf = nintf(lcsyst)
1086
1087        call SUPGstress (y, ac, x, qres, mien(iblk)%p, mxmudmi(iblk)%p,
1088     &                   cdelsq, shglf(lcsyst,:,1:nshl,:),
1089     &                   shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf),
1090     &                   shgl(lcsyst,:,1:nshl,:), shp(lcsyst,1:nshl,:),
1091     &                   stress, diss, vol)
1092
1093      enddo
1094
1095      if(numpe>1) call commu (stress, ilwork, 9, 'in ')
1096      if(numpe>1) call commu (diss, ilwork, 3, 'in ')
1097      if(numpe>1) call commu (vol, ilwork, 1, 'in ')
1098
1099c
1100c account for periodicity
1101c
1102      do j = 1,nshg
1103        i = iper(j)
1104        if (i .ne. j) then
1105           stress(i,:) = stress(i,:) + stress(j,:)
1106           diss(i,:)   = diss(i,:)   + diss(j,:)
1107           vol(i)      = vol(i)      + vol(j)
1108        endif
1109      enddo
1110
1111      do j = 1,nshg
1112        i = iper(j)
1113        if (i .ne. j) then
1114           stress(j,:) = stress(i,:)
1115           diss(j,:)   = diss(i,:)
1116           vol(j)      = vol(i)
1117        endif
1118      enddo
1119
1120      if(numpe>1) call commu (stress, ilwork, 9, 'out ')
1121      if(numpe>1) call commu (diss, ilwork, 3, 'out ')
1122      if(numpe>1) call commu (vol, ilwork, 1, 'out ')
1123
1124      vol = one / vol
1125      do i = 1, 9
1126         stress(:,i) = stress(:,i)*vol(:)
1127      enddo
1128      do i = 1, 3
1129         diss(:,i) = diss(:,i)*vol(:)
1130      enddo
1131
1132c---------- > Begin averaging dissipations and SUPG stress <--------------
1133
1134      do i = 1, 9
1135         xave(:,i) = stress(:,i)
1136      enddo
1137      xave(:,10) = diss(:,1)
1138      xave(:,11) = diss(:,2)
1139      xave(:,12) = diss(:,3)
1140
1141c  zero on processor periodic nodes so that they will not be added twice
1142        do j = 1,numnp
1143          i = iper(j)
1144          if (i .ne. j) then
1145            xave(j,:) = zero
1146          endif
1147        enddo
1148
1149      if (numpe.gt.1) then
1150
1151         numtask = ilwork(1)
1152         itkbeg = 1
1153
1154c zero the nodes that are "solved" on the other processors
1155         do itask = 1, numtask
1156
1157            iacc   = ilwork (itkbeg + 2)
1158            numseg = ilwork (itkbeg + 4)
1159
1160            if (iacc .eq. 0) then
1161               do is = 1,numseg
1162                  isgbeg = ilwork (itkbeg + 3 + 2*is)
1163                  lenseg = ilwork (itkbeg + 4 + 2*is)
1164                  isgend = isgbeg + lenseg - 1
1165                  xave(isgbeg:isgend,:) = zero
1166               enddo
1167            endif
1168
1169            itkbeg = itkbeg + 4 + 2*numseg
1170
1171         enddo
1172
1173      endif
1174c
1175
1176      xaveg = zero
1177      do i = 1,nshg
1178         xaveg(ifath(i),:) = xaveg(ifath(i),:) + xave(i,:)
1179      enddo
1180
1181      if(numpe .gt. 1)then
1182         call drvAllreduce(xaveg, xavegr,12*nfath)
1183
1184         do m = 1, 12
1185            xavegr(:,m) = xavegr(:,m)/nsons(:)
1186         enddo
1187
1188         if (myrank .eq. master) then
1189            write(*,*)'diss=', xavegt(14,11), xavegr(14,11)
1190         endif
1191
1192         do m = 1, 12
1193            xavegt(:,m) = xavegt(:,m) + xavegr(:,m)
1194         enddo
1195
1196      else
1197
1198         do m = 1, 12
1199            xaveg(:,m) = xaveg(:,m)/nsons(:)
1200         enddo
1201
1202         do m = 1, 12
1203            xavegt(:,m) = xavegt(:,m) + xaveg(:,m)
1204         enddo
1205
1206      endif
1207
1208      if (myrank .eq. master) then
1209         write(*,*)'diss=', xavegt(14,11), xavegr(14,11)
1210      endif
1211
1212      if ( istep .eq. (nstep(1)-1) ) then
1213         if ( myrank .eq. master) then
1214
1215            do i = 1, nfath
1216               write(376,*)xavegt(i,1),xavegt(i,2),xavegt(i,3)
1217               write(377,*)xavegt(i,4),xavegt(i,5),xavegt(i,6)
1218               write(378,*)xavegt(i,7),xavegt(i,8),xavegt(i,9)
1219               write(379,*)xavegt(i,10),xavegt(i,11),xavegt(i,12)
1220            enddo
1221
1222            call flush(376)
1223            call flush(377)
1224            call flush(378)
1225            call flush(379)
1226
1227         endif
1228      endif
1229
1230
1231      return
1232
1233      end
1234
1235
1236      subroutine resSij (y, x, ien, hfres, shgl, shp, Qwtf)
1237
1238      include "common.h"
1239
1240      dimension y(nshg,5),             hfres(nshg,24)
1241      dimension x(numnp,3),            xl(npro,nenl,3)
1242      dimension ien(npro,nshl),        yl(npro,nshl,5),
1243     &          fresl(npro,nshl,24),        WdetJ(npro),
1244     &          u1(npro),              u2(npro),
1245     &          u3(npro),              rho(npro),
1246     &          S11(npro),             S22(npro),
1247     &          S33(npro),             S12(npro),
1248     &          S13(npro),             S23(npro),
1249     &          dxdxi(npro,nsd,nsd),   dxidx(npro,nsd,nsd),
1250     &          shgl(nsd,nshl,ngauss),       shg(npro,nshl,nsd),
1251     &          shp(nshl,ngauss),
1252     &          fresli(npro,nshl,24),   Qwtf(ngaussf),
1253     &          strnrmi(npro)
1254
1255
1256      dimension tmp(npro)
1257
1258      call local (y,      yl,     ien,    5,  'gather  ')
1259      call localx (x,      xl,     ien,    3,  'gather  ')
1260c
1261
1262      fresl = zero
1263
1264c
1265      do intp = 1, ngaussf   ! Loop over quadrature points
1266
1267c  calculate the metrics
1268c
1269c
1270c.... --------------------->  Element Metrics  <-----------------------
1271c
1272c.... compute the deformation gradient
1273c
1274         dxdxi = zero
1275c
1276         do n = 1, nenl
1277            dxdxi(:,1,1) = dxdxi(:,1,1) + xl(:,n,1) * shgl(1,n,intp)
1278            dxdxi(:,1,2) = dxdxi(:,1,2) + xl(:,n,1) * shgl(2,n,intp)
1279            dxdxi(:,1,3) = dxdxi(:,1,3) + xl(:,n,1) * shgl(3,n,intp)
1280            dxdxi(:,2,1) = dxdxi(:,2,1) + xl(:,n,2) * shgl(1,n,intp)
1281            dxdxi(:,2,2) = dxdxi(:,2,2) + xl(:,n,2) * shgl(2,n,intp)
1282            dxdxi(:,2,3) = dxdxi(:,2,3) + xl(:,n,2) * shgl(3,n,intp)
1283            dxdxi(:,3,1) = dxdxi(:,3,1) + xl(:,n,3) * shgl(1,n,intp)
1284            dxdxi(:,3,2) = dxdxi(:,3,2) + xl(:,n,3) * shgl(2,n,intp)
1285            dxdxi(:,3,3) = dxdxi(:,3,3) + xl(:,n,3) * shgl(3,n,intp)
1286         enddo
1287c
1288c.... compute the inverse of deformation gradient
1289c
1290         dxidx(:,1,1) =   dxdxi(:,2,2) * dxdxi(:,3,3)
1291     &        - dxdxi(:,3,2) * dxdxi(:,2,3)
1292         dxidx(:,1,2) =   dxdxi(:,3,2) * dxdxi(:,1,3)
1293     &        - dxdxi(:,1,2) * dxdxi(:,3,3)
1294         dxidx(:,1,3) =   dxdxi(:,1,2) * dxdxi(:,2,3)
1295     &        - dxdxi(:,1,3) * dxdxi(:,2,2)
1296         tmp          = one / ( dxidx(:,1,1) * dxdxi(:,1,1)
1297     &        + dxidx(:,1,2) * dxdxi(:,2,1)
1298     &        + dxidx(:,1,3) * dxdxi(:,3,1) )
1299         dxidx(:,1,1) = dxidx(:,1,1) * tmp
1300         dxidx(:,1,2) = dxidx(:,1,2) * tmp
1301         dxidx(:,1,3) = dxidx(:,1,3) * tmp
1302         dxidx(:,2,1) = (dxdxi(:,2,3) * dxdxi(:,3,1)
1303     &        - dxdxi(:,2,1) * dxdxi(:,3,3)) * tmp
1304         dxidx(:,2,2) = (dxdxi(:,1,1) * dxdxi(:,3,3)
1305     &        - dxdxi(:,3,1) * dxdxi(:,1,3)) * tmp
1306         dxidx(:,2,3) = (dxdxi(:,2,1) * dxdxi(:,1,3)
1307     &        - dxdxi(:,1,1) * dxdxi(:,2,3)) * tmp
1308         dxidx(:,3,1) = (dxdxi(:,2,1) * dxdxi(:,3,2)
1309     &        - dxdxi(:,2,2) * dxdxi(:,3,1)) * tmp
1310         dxidx(:,3,2) = (dxdxi(:,3,1) * dxdxi(:,1,2)
1311     &        - dxdxi(:,1,1) * dxdxi(:,3,2)) * tmp
1312         dxidx(:,3,3) = (dxdxi(:,1,1) * dxdxi(:,2,2)
1313     &        - dxdxi(:,1,2) * dxdxi(:,2,1)) * tmp
1314c
1315c        wght=Qwt(lcsyst,intp)  ! may be different now
1316         wght=Qwtf(intp)
1317         WdetJ(:) = wght / tmp(:)
1318
1319
1320c... compute the gradient of shape functions at the quad. point.
1321
1322
1323      do n = 1,nshl
1324        shg(:,n,1) = (shgl(1,n,intp) * dxidx(:,1,1)
1325     &              + shgl(2,n,intp) * dxidx(:,2,1)
1326     &              + shgl(3,n,intp) * dxidx(:,3,1))
1327        shg(:,n,2) = (shgl(1,n,intp) * dxidx(:,1,2)
1328     &              + shgl(2,n,intp) * dxidx(:,2,2)
1329     &              + shgl(3,n,intp) * dxidx(:,3,2))
1330        shg(:,n,3) = (shgl(1,n,intp) * dxidx(:,1,3)
1331     &              + shgl(2,n,intp) * dxidx(:,2,3)
1332     &              + shgl(3,n,intp) * dxidx(:,3,3))
1333      enddo
1334
1335
1336c... compute the velocities and the strain rate tensor at the quad. point
1337
1338
1339         u1  = zero
1340         u2  = zero
1341         u3  = zero
1342         S11 = zero
1343         S22 = zero
1344         S33 = zero
1345         S12 = zero
1346         S13 = zero
1347         S23 = zero
1348         do i=1,nshl
1349            u1 = u1 + shp(i,intp)*yl(:,i,2)
1350            u2 = u2 + shp(i,intp)*yl(:,i,3)
1351            u3 = u3 + shp(i,intp)*yl(:,i,4)
1352
1353            S11 = S11 + shg(:,i,1)*yl(:,i,2)
1354            S22 = S22 + shg(:,i,2)*yl(:,i,3)
1355            S33 = S33 + shg(:,i,3)*yl(:,i,4)
1356
1357            S12 = S12 + shg(:,i,2)*yl(:,i,2)
1358     &                       +shg(:,i,1)*yl(:,i,3)
1359            S13 = S13 + shg(:,i,3)*yl(:,i,2)
1360     &                       +shg(:,i,1)*yl(:,i,4)
1361            S23 = S23 + shg(:,i,3)*yl(:,i,3)
1362     &                       +shg(:,i,2)*yl(:,i,4)
1363         enddo
1364         S12 = pt5 * S12
1365         S13 = pt5 * S13
1366         S23 = pt5 * S23
1367
1368c... Get the strain rate norm at the quad pts
1369
1370         strnrmi = sqrt( two*(S11**2 + S22**2 + S33**2)
1371     &        + four*(S12**2 + S13**2 + S23**2) )
1372
1373
1374c... Loop over element nodes and multiply u_{i} and S_{i,j} by the
1375c... hat kernel and the Jacobian over the current element evaluated at
1376c... the current quad. point.
1377
1378         do i = 1, nenl   ! Loop over element nodes
1379
1380            fresli(:,i,1) = WdetJ * u1 * shp(i,intp) !G * u1 * WdetJ
1381            fresli(:,i,2) = WdetJ * u2 * shp(i,intp) !G * u2 * WdetJ
1382            fresli(:,i,3) = WdetJ * u3 * shp(i,intp) !G * u3 * WdetJ
1383
1384            fresli(:,i,4) = WdetJ * u1 * u1 * shp(i,intp) ! G*u1*u1*WdetJ
1385            fresli(:,i,5) = WdetJ * u2 * u2 * shp(i,intp) ! G*u2*u2*WdetJ
1386            fresli(:,i,6) = WdetJ * u3 * u3 * shp(i,intp) ! G*u3*u3*WdetJ
1387            fresli(:,i,7) = WdetJ * u1 * u2 * shp(i,intp) ! G*u1*u2*WdetJ
1388            fresli(:,i,8) = WdetJ * u1 * u3 * shp(i,intp) ! G*u1*u3*WdetJ
1389            fresli(:,i,9) = WdetJ * u2 * u3 * shp(i,intp) ! G*u2*u3*WdetJ
1390
1391            fresli(:,i,10) = S11 * shp(i,intp) * WdetJ ! G*S_{1,1}*WdetJ
1392            fresli(:,i,11) = S22 * shp(i,intp) * WdetJ ! G*S_{2,2}*WdetJ
1393            fresli(:,i,12) = S33 * shp(i,intp) * WdetJ ! G*S_{3,3}*WdetJ
1394            fresli(:,i,13) = S12 * shp(i,intp) * WdetJ ! G*S_{1,1}*WdetJ
1395            fresli(:,i,14) = S13 * shp(i,intp) * WdetJ ! G*S_{1,3}*WdetJ
1396            fresli(:,i,15) = S23 * shp(i,intp) * WdetJ ! G*S_{2,3}*WdetJ
1397
1398
1399            fresli(:,i,16) = strnrmi*strnrmi*strnrmi*shp(i,intp)*WdetJ
1400
1401            fresli(:,i,22) = WdetJ*shp(i,intp) !Integral of filter kernel, G,
1402c                                               over the element
1403
1404c...   Get G*|S|*S_{i,j}*WdetJ
1405
1406c            fresli(:,i,16) = S11 * strnrmi * shp(i,intp) * WdetJ
1407            fresli(:,i,17) = S22 * strnrmi * shp(i,intp) * WdetJ
1408            fresli(:,i,18) = S33 * strnrmi * shp(i,intp) * WdetJ
1409            fresli(:,i,19) = S12 * strnrmi * shp(i,intp) * WdetJ
1410            fresli(:,i,20) = S13 * strnrmi * shp(i,intp) * WdetJ
1411            fresli(:,i,21) = S23 * strnrmi * shp(i,intp) * WdetJ
1412
1413            do j = 1, 22 ! Add contribution of each quad. point for each
1414c                          element node
1415               fresl(:,i,j) = fresl(:,i,j) + fresli(:,i,j)
1416            enddo
1417
1418         enddo                  !end loop over element nodes
1419
1420      enddo                     !end of loop over integration points
1421
1422
1423      call local (hfres, fresl, ien, 24, 'scatter ')
1424
1425      return
1426      end
1427
1428
1429
1430	subroutine sparseCG (rhsorig, trx, lhsM, row, col, iper,
1431     &     ilwork, iBC, BC)
1432c
1433c------------------------------------------------------------------------
1434c
1435c  This subroutine uses Conjugate Gradient,
1436c to solve the system of equations.
1437c
1438c Farzin Shakib,  Summer 1987.
1439c------------------------------------------------------------------------
1440c
1441	include "common.h"
1442c
1443	dimension rhsorig(nshg), trx(nshg)
1444c
1445	dimension d(nshg),     p(nshg),
1446     &            q(nshg),     ilwork(nlwork),
1447     &		  Dinv(nshg),  rhs(nshg),
1448     &            pp(nshg),
1449     &            iBC(nshg),
1450     &            BC(nshg)
1451
1452
1453        integer   row(nshg*nnz),         col(nshg+1)
1454	integer   iBCdumb(nshg),         iper(nshg)
1455
1456	real*8 BCdumb(nshg,ndofBC)
1457
1458	real*8	lhsM(nnz*nshg)
1459c
1460	data nitercf / 100 /
1461c
1462c
1463	BCdumb  = one
1464	iBCdumb = 1
1465c
1466	rhs(:)=rhsorig(:)
1467c
1468c.... Calculate the inverse of the diagonal of the mass matrix
1469c
1470c       call CFDinv (Dinv, emass)
1471c
1472c.... Left precondition the mass matrix and the RHS
1473c
1474c	call CFPre (Dinv, emass, rhs)
1475c
1476c Initialize. We have a system Ax=b We have made A as
1477c well conditionedand as we're willing to go.  Since
1478c we used left preconditioning (on the old A and old b),
1479c we don't need to do any back-preconditioning later.
1480c
1481	rr = 0
1482	do n = 1, nshg
1483	   rr  = rr + rhs(n)*rhs(n)
1484	enddo
1485c
1486c  if parallel the above is only this processors part of the dot product.
1487c  get the total result
1488c
1489	   dotTot=zero
1490	   call drvAllreduce(rr,dotTot,1)
1491	   rr=dotTot
1492	rr0 = rr
1493c
1494	trx(:) = zero ! x_{0}=0
1495c                   ! r_{0}=b
1496c
1497c                   ! beta_{1}=0
1498	p(:) = rhs(:) ! p_{1}=r_{0}=b
1499c
1500c.... Iterate
1501c
1502	do iter = 1, nitercf      ! 15  ! nitercf
1503c
1504c.... calculate alpha
1505c
1506	   pp=p   ! make a vector that we can copy masters onto slaves
1507		  ! and thereby make ready for an accurate Ap product
1508
1509	   call commOut(pp, ilwork, 1,iper,iBCdumb,BCdumb)  !slaves= master
1510
1511	   call fLesSparseApSclr(	col,	row,	lhsM,
1512     &					pp,	q,	nshg,
1513     &                                  nnz)
1514
1515	   call commIn(q, ilwork, 1,iper,iBC,BC) ! masters=masters+slaves
1516							 ! slaves zeroed
1517c	   call CFAp (p,  q) ! put Ap product in q
1518
1519c	   if(nump>1) call commu (q, ilwork, 1, 'in ')
1520
1521c	   do j = 1,nshg
1522c	      i = iper(j)
1523c	      if (i .ne. j) then
1524c		 q(i) = q(i) + q(j)
1525c	      endif
1526c	   enddo
1527
1528c	   do j = 1,nshg
1529c	      i = iper(j)
1530c	      if (i .ne. j) then
1531c		 q(j) = zero
1532c	      endif
1533c	   enddo
1534
1535c     Need to zero off-processor slaves as well.
1536
1537c      if (numpe.gt.1 .and. nsons(1).gt.1) then
1538
1539c         numtask = ilwork(1)
1540c         itkbeg = 1
1541
1542c zero the nodes that are "solved" on the other processors
1543
1544c         do itask = 1, numtask
1545
1546c            iacc   = ilwork (itkbeg + 2)
1547c            numseg = ilwork (itkbeg + 4)
1548
1549c            if (iacc .eq. 0) then
1550c               do is = 1,numseg
1551c                  isgbeg = ilwork (itkbeg + 3 + 2*is)
1552c                  lenseg = ilwork (itkbeg + 4 + 2*is)
1553c                  isgend = isgbeg + lenseg - 1
1554c                  q(isgbeg:isgend) = zero
1555c               enddo
1556c           endif
1557
1558c            itkbeg = itkbeg + 4 + 2*numseg
1559
1560c         enddo
1561
1562c	endif
1563
1564
1565
1566	   pap = 0
1567	   do  n = 1, nshg
1568	      pap = pap + p(n) * q(n)
1569	   enddo
1570c
1571c  if parallel the above is only this processors part of the dot product.
1572c  get the total result
1573c
1574           dotTot=zero
1575	   call drvAllreduce(pap,dotTot,1)
1576	   pap=dotTot
1577	   alpha = rr / pap
1578c
1579c.... calculate the next guess
1580c
1581	   trx(:) = trx(:) + alpha * p(:)
1582c
1583c.... calculate the new residual
1584c
1585c
1586	   rhs(:) = rhs(:) - alpha * q(:)
1587	   tmp = rr
1588	   rr = 0
1589	   do n = 1, nshg
1590	      rr = rr + rhs(n)*rhs(n)
1591	   enddo
1592c
1593c  if parallel the above is only this processors part of the dot product.
1594c  get the total result
1595c
1596           dotTot=zero
1597	   call drvAllreduce(rr,dotTot,1)
1598	   rr=dotTot
1599c
1600c.... check for convergence
1601c
1602	   if(rr.lt.100.*epsM**2) goto 6000
1603c
1604c.... calculate a new search direction
1605c
1606	   beta = rr / tmp
1607	   p(:) = rhs(:) + beta * p(:)
1608c
1609c.... end of iteration
1610c
1611	enddo
1612c
1613c.... if converged
1614c
16156000	continue
1616
1617c need a commu(out) on solution (TRX) to get slaves the correct solution AND
1618c on processor slaves = on processor masters
1619
1620	if(numpe>1) call commu (trx, ilwork, 1, 'out ')
1621	trx(:) = trx(iper(:))
1622
1623c
1624	write(*,9000) iter, rr / rr0
1625c	write(16,9000) iter, rr / rr0
1626c
1627c.... return
1628c
1629	return
16309000	format(20x,'  number of iterations:', i10,/,
1631     &	       20x,'    residual reduction:', 2x,e10.2)
1632	end
1633
1634      subroutine stdfdmc (y,      shgl,      shp,
1635     &                   iper,   ilwork,
1636     &                   nsons,  ifath,     x)
1637
1638      use pointer_data
1639
1640      use quadfilt   ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf),
1641c                    shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf).
1642c                    Shpf and shglf are the shape funciotns and their
1643c                    gradient evaluated using the quadrature rule desired
1644c                    for computing the dmod. Qwt contains the weights of the
1645c                    quad. points.
1646
1647
1648
1649      include "common.h"
1650      include "mpif.h"
1651      include "auxmpi.h"
1652
1653c
1654      dimension fres(nshg,24),         fwr(nshg),
1655     &          strnrm(nshg),         cdelsq(nshg),
1656     &          cdelsq2(nshg),
1657     &          xnum(nshg),           xden(nshg),
1658     &          xmij(nshg,6),         xlij(nshg,6),
1659     &          xnude(nfath,2),        xnuder(nfath,2),
1660     &          ynude(nfath,6),        ynuder(nfath,6)
1661      dimension ui(nfath,3),           snorm(nfath),
1662     &          uir(nfath,3),          snormr(nfath),
1663     &          xm(nfath,6),           xl(nfath,6),
1664     &          xl1(nfath,6),          xl2(nfath,6),
1665     &          xl1r(nfath,6),         xl2r(nfath,6),
1666     &          xmr(nfath,6),          xlr(nfath,6),
1667     &          nsons(nshg),
1668     &          strl(numel,ngauss)
1669      dimension y(nshg,5),            yold(nshg,5),
1670     &          ifath(nshg),          iper(nshg),
1671     &          ilwork(nlwork),!        xmudmi(numel,ngauss),
1672     &          x(numnp,3),
1673     &          shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT),
1674     &          xnutf(nfath),         xfac(nshg,5)
1675
1676      character*10 cname
1677      character*30 fname1, fname2, fname3, fname4, fname5, fname6,
1678     &             fname0
1679c
1680c
1681c   setup the weights for time averaging of cdelsq (now in quadfilt module)
1682c
1683      denom=max(1.0d0*(lstep),one)
1684      if(dtavei.lt.0) then
1685         wcur=one/denom
1686      else
1687         wcur=dtavei
1688      endif
1689      whist=1.0-wcur
1690
1691      if (istep .eq. 0) then
1692         xnd      = zero
1693         xmodcomp = zero
1694         xmcomp  = zero
1695         xlcomp  = zero
1696         xl1comp  = zero
1697         xl2comp  = zero
1698         ucomp    = zero
1699         scomp    = zero
1700      endif
1701
1702
1703      fres = zero
1704      yold(:,1)=y(:,4)
1705      yold(:,2:4)=y(:,1:3)
1706c
1707
1708c
1709c  hack in an interesting velocity field (uncomment to test dmod)
1710c
1711c      yold(:,5) = 1.0  ! Debugging
1712c      yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2)
1713c      yold(:,2) = 2.0
1714c      yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2)
1715c      yold(:,3) = 3.0
1716c      yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3)
1717c      yold(:,4) = 4.0
1718c      yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable
1719c                               suitable for the
1720
1721
1722
1723      intrul=intg(1,itseq)
1724      intind=intpt(intrul)
1725
1726      do iblk = 1,nelblk
1727        lcsyst = lcblk(3,iblk)
1728        iel  = lcblk(1,iblk) !Element number where this block begins
1729        npro = lcblk(1,iblk+1) - iel
1730        lelCat = lcblk(2,iblk)
1731        nenl = lcblk(5,iblk)
1732        nshl = lcblk(10,iblk)
1733        inum  = iel + npro - 1
1734
1735        ngauss = nint(lcsyst)
1736        ngaussf = nintf(lcsyst)
1737
1738        call asithf (yold, x, strl(iel:inum,:), mien(iblk)%p, fres,
1739     &               shglf(lcsyst,:,1:nshl,:),
1740     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
1741
1742      enddo
1743c
1744
1745c      if (ngaussf .ne. ngauss) then
1746      do iblk = 1,nelblk
1747        lcsyst = lcblk(3,iblk)
1748        iel  = lcblk(1,iblk) !Element number where this block begins
1749        npro = lcblk(1,iblk+1) - iel
1750        lelCat = lcblk(2,iblk)
1751        nenl = lcblk(5,iblk)
1752        nshl = lcblk(10,iblk)
1753        inum  = iel + npro - 1
1754
1755        ngauss = nint(lcsyst)
1756        ngaussf = nintf(lcsyst)
1757
1758        if (ngaussf .ne. ngauss) then
1759
1760        call getstrl (yold, x,      mien(iblk)%p,
1761     &               strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:),
1762     &               shp(lcsyst,1:nshl,:))
1763
1764        endif
1765
1766      enddo
1767c      endif
1768c
1769c
1770C must fix for abc and dynamic model
1771c      if(iabc==1)   !are there any axisym bc's
1772c     &      call rotabc(res, iBC, BC,nflow, 'in ')
1773c
1774      if(numpe>1) call commu (fres, ilwork, 24, 'in ')
1775c
1776c account for periodicity in filtered variables
1777c
1778      do j = 1,nshg
1779        i = iper(j)
1780        if (i .ne. j) then
1781           fres(i,:) = fres(i,:) + fres(j,:)
1782        endif
1783      enddo
1784      do j = 1,nshg
1785        i = iper(j)
1786        if (i .ne. j) then
1787           fres(j,:) = fres(i,:)
1788        endif
1789      enddo
1790
1791      if(numpe>1)   call commu (fres, ilwork, 24, 'out')
1792
1793      fres(:,23) = one / fres(:,23)
1794      do j = 1,22
1795        fres(:,j) = fres(:,j) * fres(:,23)
1796      enddo
1797c     fres(:,24) = fres(:,24) * fres(:,23)
1798c
1799c.....at this point fres is really all of our filtered quantities
1800c     at the nodes
1801c
1802
1803      strnrm = sqrt(
1804     &  two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2)
1805     &  + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
1806
1807      fwr = fwr1 * fres(:,22) * strnrm
1808
1809      xmij(:,1) = -fwr
1810     &             * fres(:,10) + fres(:,16)
1811      xmij(:,2) = -fwr
1812     &             * fres(:,11) + fres(:,17)
1813      xmij(:,3) = -fwr
1814     &             * fres(:,12) + fres(:,18)
1815
1816      xmij(:,4) = -fwr * fres(:,13) + fres(:,19)
1817      xmij(:,5) = -fwr * fres(:,14) + fres(:,20)
1818      xmij(:,6) = -fwr * fres(:,15) + fres(:,21)
1819
1820      fres(:,22) = one / fres(:,22)
1821
1822      xlij(:,1) = fres(:,4) - fres(:,1) * fres(:,1) * fres(:,22)
1823      xlij(:,2) = fres(:,5) - fres(:,2) * fres(:,2) * fres(:,22)
1824      xlij(:,3) = fres(:,6) - fres(:,3) * fres(:,3) * fres(:,22)
1825      xlij(:,4) = fres(:,7) - fres(:,1) * fres(:,2) * fres(:,22)
1826      xlij(:,5) = fres(:,8) - fres(:,1) * fres(:,3) * fres(:,22)
1827      xlij(:,6) = fres(:,9) - fres(:,2) * fres(:,3) * fres(:,22)
1828
1829      xnum =        xlij(:,1) * xmij(:,1) + xlij(:,2) * xmij(:,2)
1830     &                                    + xlij(:,3) * xmij(:,3)
1831     &     + two * (xlij(:,4) * xmij(:,4) + xlij(:,5) * xmij(:,5)
1832     &                                    + xlij(:,6) * xmij(:,6))
1833      xden =        xmij(:,1) * xmij(:,1) + xmij(:,2) * xmij(:,2)
1834     &                                    + xmij(:,3) * xmij(:,3)
1835     &     + two * (xmij(:,4) * xmij(:,4) + xmij(:,5) * xmij(:,5)
1836     &                                    + xmij(:,6) * xmij(:,6))
1837      xden = two * xden
1838
1839c... For collectection of statistics on dyn. model components
1840
1841      xfac(:,1) = strnrm*strnrm*( fres(:,10)**2 + fres(:,11)**2 +
1842     &     fres(:,12)**2
1843     &     + two*( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
1844
1845      xfac(:,2) = strnrm*( xlij(:,1)*fres(:,10) + xlij(:,2)*fres(:,11)
1846     &     + xlij(:,3)*fres(:,12) +
1847     &     two*(xlij(:,4)*fres(:,13) + xlij(:,5)*fres(:,14) +
1848     &     xlij(:,6)*fres(:,15)) )
1849
1850      xfac(:,3) = strnrm*( fres(:,10)*fres(:,16) + fres(:,11)*fres(:,17)
1851     &     + fres(:,12)*fres(:,18) +
1852     &     two*(fres(:,13)*fres(:,19) + fres(:,14)*fres(:,20) +
1853     &     fres(:,15)*fres(:,21)) )
1854
1855      xfac(:,4) = xlij(:,1)*fres(:,16) + xlij(:,2)*fres(:,17)
1856     &     + xlij(:,3)*fres(:,18) +
1857     &     two*(xlij(:,4)*fres(:,19) + xlij(:,5)*fres(:,20) +
1858     &     xlij(:,6)*fres(:,21))
1859
1860      xfac(:,5) = fres(:,16)*fres(:,16) + fres(:,17)*fres(:,17)
1861     &     + fres(:,18)*fres(:,18) +
1862     &     two*(fres(:,19)*fres(:,19) + fres(:,20)*fres(:,20) +
1863     &     fres(:,21)*fres(:,21))
1864
1865c  zero on processor periodic nodes so that they will not be added twice
1866        do j = 1,numnp
1867          i = iper(j)
1868          if (i .ne. j) then
1869            xnum(j) = zero
1870            xden(j) = zero
1871            xfac(j,:) = zero
1872            xmij(j,:) = zero
1873            xlij(j,:) = zero
1874            fres(j,:) = zero
1875            strnrm(j) = zero
1876          endif
1877        enddo
1878
1879      if (numpe.gt.1) then
1880
1881         numtask = ilwork(1)
1882         itkbeg = 1
1883
1884c zero the nodes that are "solved" on the other processors
1885         do itask = 1, numtask
1886
1887            iacc   = ilwork (itkbeg + 2)
1888            numseg = ilwork (itkbeg + 4)
1889
1890            if (iacc .eq. 0) then
1891               do is = 1,numseg
1892                  isgbeg = ilwork (itkbeg + 3 + 2*is)
1893                  lenseg = ilwork (itkbeg + 4 + 2*is)
1894                  isgend = isgbeg + lenseg - 1
1895                  xnum(isgbeg:isgend) = zero
1896                  xden(isgbeg:isgend) = zero
1897                  strnrm(isgbeg:isgend) = zero
1898                  xfac(isgbeg:isgend,:) = zero
1899                  xmij(isgbeg:isgend,:) = zero
1900                  xlij(isgbeg:isgend,:) = zero
1901                  fres(isgbeg:isgend,:) = zero
1902               enddo
1903            endif
1904
1905            itkbeg = itkbeg + 4 + 2*numseg
1906
1907         enddo
1908
1909      endif
1910c
1911c Description of arrays.   Each processor has an array of length equal
1912c to the total number of fathers times 2 xnude(nfathers,2). One to collect
1913c the numerator and one to collect the denominator.  There is also an array
1914c of length nshg on each processor which tells the father number of each
1915c on processor node, ifath(nnshg).  Finally, there is an arry of length
1916c nfathers to tell the total (on all processors combined) number of sons
1917c for each father.
1918c
1919c  Now loop over nodes and accumlate the numerator and the denominator
1920c  to the father nodes.  Only on processor addition at this point.
1921c  Note that serrogate fathers are collect some for the case where some
1922c  sons are on another processor
1923c
1924      xnude = zero
1925      ynude = zero
1926      xm    = zero
1927      xl    = zero
1928      xl1   = zero
1929      xl2   = zero
1930      ui    = zero
1931      snorm = zero
1932
1933      do i = 1,nshg
1934         xnude(ifath(i),1) = xnude(ifath(i),1) + xnum(i)
1935         xnude(ifath(i),2) = xnude(ifath(i),2) + xden(i)
1936
1937         ynude(ifath(i),1) = ynude(ifath(i),1) + xfac(i,1)
1938         ynude(ifath(i),2) = ynude(ifath(i),2) + xfac(i,2)
1939         ynude(ifath(i),3) = ynude(ifath(i),3) + xfac(i,3)
1940         ynude(ifath(i),4) = ynude(ifath(i),4) + xfac(i,4)
1941         ynude(ifath(i),5) = ynude(ifath(i),5) + xfac(i,5)
1942
1943         xm(ifath(i),1) = xm(ifath(i),1) + xmij(i,1)
1944         xm(ifath(i),2) = xm(ifath(i),2) + xmij(i,2)
1945         xm(ifath(i),3) = xm(ifath(i),3) + xmij(i,3)
1946         xm(ifath(i),4) = xm(ifath(i),4) + xmij(i,4)
1947         xm(ifath(i),5) = xm(ifath(i),5) + xmij(i,5)
1948         xm(ifath(i),6) = xm(ifath(i),6) + xmij(i,6)
1949
1950         xl(ifath(i),1) = xl(ifath(i),1) + xlij(i,1)
1951         xl(ifath(i),2) = xl(ifath(i),2) + xlij(i,2)
1952         xl(ifath(i),3) = xl(ifath(i),3) + xlij(i,3)
1953         xl(ifath(i),4) = xl(ifath(i),4) + xlij(i,4)
1954         xl(ifath(i),5) = xl(ifath(i),5) + xlij(i,5)
1955         xl(ifath(i),6) = xl(ifath(i),6) + xlij(i,6)
1956
1957         xl1(ifath(i),1) = xl1(ifath(i),1) + fres(i,4)
1958         xl1(ifath(i),2) = xl1(ifath(i),2) + fres(i,5)
1959         xl1(ifath(i),3) = xl1(ifath(i),3) + fres(i,6)
1960         xl1(ifath(i),4) = xl1(ifath(i),4) + fres(i,7)
1961         xl1(ifath(i),5) = xl1(ifath(i),5) + fres(i,8)
1962         xl1(ifath(i),6) = xl1(ifath(i),6) + fres(i,9)
1963
1964         xl2(ifath(i),1) = xl2(ifath(i),1) + fres(i,1)*fres(i,1)
1965         xl2(ifath(i),2) = xl2(ifath(i),2) + fres(i,2)*fres(i,2)
1966         xl2(ifath(i),3) = xl2(ifath(i),3) + fres(i,3)*fres(i,3)
1967         xl2(ifath(i),4) = xl2(ifath(i),4) + fres(i,1)*fres(i,2)
1968         xl2(ifath(i),5) = xl2(ifath(i),5) + fres(i,1)*fres(i,3)
1969         xl2(ifath(i),6) = xl2(ifath(i),6) + fres(i,2)*fres(i,3)
1970
1971         ui(ifath(i),1) = ui(ifath(i),1) + fres(i,1)
1972         ui(ifath(i),2) = ui(ifath(i),2) + fres(i,2)
1973         ui(ifath(i),3) = ui(ifath(i),3) + fres(i,3)
1974
1975         snorm(ifath(i)) = snorm(ifath(i)) + strnrm(i)
1976
1977      enddo
1978
1979c
1980c Now  the true fathers and serrogates combine results and update
1981c each other.
1982c
1983      if(numpe .gt. 1)then
1984         call drvAllreduce(xnude, xnuder,2*nfath)
1985         call drvAllreduce(ynude, ynuder,6*nfath)
1986         call drvAllreduce(xm, xmr,6*nfath)
1987         call drvAllreduce(xl, xlr,6*nfath)
1988         call drvAllreduce(xl1, xl1r,6*nfath)
1989         call drvAllreduce(xl2, xl2r,6*nfath)
1990         call drvAllreduce(ui, uir,3*nfath)
1991         call drvAllreduce(snorm, snormr,nfath)
1992
1993         do i = 1, nfath
1994            ynuder(i,6) = ( ynuder(i,4) - fwr1*ynuder(i,2) ) /
1995     &           ( two*ynuder(i,5) - four*fwr1*ynuder(i,3)
1996     &           + two*fwr1*fwr1*ynuder(i,1) )
1997         enddo
1998
1999         cdelsq2(:) = ynuder(ifath(:),6)  ! For comparison w/ cdelsq
2000c
2001c  xnude is the sum of the sons for each father on this processor
2002c
2003c  xnuder is the sum of the sons for each father on all processor combined
2004c  (the same as if we had not partitioned the mesh for each processor)
2005c
2006c   For each father we have precomputed the number of sons (including
2007c   the sons off processor).
2008c
2009c   Now divide by number of sons to get the average (not really necessary
2010c   for dynamic model since ratio will cancel nsons at each father)
2011c
2012         xnuder(:,1) = xnuder(:,1) / nsons(:)
2013         xnuder(:,2) = xnuder(:,2) / nsons(:)
2014
2015         do m = 1, 5
2016         ynuder(:,m) = ynuder(:,m)/nsons(:)
2017         enddo
2018         do m = 1,6
2019         xmr(:,m) = xmr(:,m)/nsons(:)
2020         xlr(:,m) = xlr(:,m)/nsons(:)
2021         xl1r(:,m) = xl1r(:,m)/nsons(:)
2022         xl2r(:,m) = xl2r(:,m)/nsons(:)
2023         enddo
2024
2025         uir(:,1) = uir(:,1)/nsons(:)
2026         uir(:,2) = uir(:,2)/nsons(:)
2027         uir(:,3) = uir(:,3)/nsons(:)
2028
2029         snormr(:) = snormr(:)/nsons(:)
2030c
2031cc  the next line is c \Delta^2
2032cc
2033cc         xnuder(:,1) = xnuder(:,1) / (xnuder(:,2) + 1.d-09)
2034cc         do i = 1,nshg
2035cc            cdelsq(i) = xnuder(ifath(i),1)
2036cc         enddo
2037
2038            numNden(:,1) = whist*numNden(:,1)+wcur*xnuder(ifath(:),1)
2039            numNden(:,2) = whist*numNden(:,2)+wcur*xnuder(ifath(:),2)
2040            cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
2041
2042c            cdelsq(:) = xnuder(ifath(:),1)/(xnuder(ifath(:),2)+1.d-09)
2043
2044            xnd(:,1) = xnd(:,1) + xnuder(:,1)
2045            xnd(:,2) = xnd(:,2) + xnuder(:,2)
2046
2047            xmodcomp(:,1) = xmodcomp(:,1)+ynuder(:,1)
2048            xmodcomp(:,2) = xmodcomp(:,2)+ynuder(:,2)
2049            xmodcomp(:,3) = xmodcomp(:,3)+ynuder(:,3)
2050            xmodcomp(:,4) = xmodcomp(:,4)+ynuder(:,4)
2051            xmodcomp(:,5) = xmodcomp(:,5)+ynuder(:,5)
2052
2053            xmcomp(:,:) = xmcomp(:,:)+xmr(:,:)
2054            xlcomp(:,:) = xlcomp(:,:)+xlr(:,:)
2055
2056            xl1comp(:,:) = xl1comp(:,:)+xl1r(:,:)
2057            xl2comp(:,:) = xl2comp(:,:)+xl2r(:,:)
2058
2059            ucomp(:,:) = ucomp(:,:)+uir(:,:)
2060            u1 = uir(32,1)
2061            scomp(:)   = scomp(:)+snormr(:)
2062
2063      else
2064
2065         xnude(:,1) = xnude(:,1)/nsons(:)
2066         xnude(:,2) = xnude(:,2)/nsons(:)
2067
2068         do m = 1, 5
2069         ynude(:,m) = ynude(:,m)/nsons(:)
2070         enddo
2071         do m = 1,6
2072         xm(:,m) = xm(:,m)/nsons(:)
2073         xl(:,m) = xl(:,m)/nsons(:)
2074         xl1(:,m) = xl1(:,m)/nsons(:)
2075         xl2(:,m) = xl2(:,m)/nsons(:)
2076         enddo
2077
2078         ui(:,1) = ui(:,1)/nsons(:)
2079         ui(:,2) = ui(:,2)/nsons(:)
2080         ui(:,3) = ui(:,3)/nsons(:)
2081
2082         snorm(:) = snorm(:)/nsons(:)
2083
2084c
2085c     the next line is c \Delta^2, not nu_T but we want to save the
2086c     memory
2087c
2088
2089cc         xnude(:,1) = xnude(:,1) / (xnude(:,2) + 1.d-09)
2090cc        do i = 1,nshg
2091cc            cdelsq(i) = xnude(ifath(i),1)
2092cc         enddo
2093cc      endif
2094
2095         do i = 1, nfath
2096            ynude(i,6) = ( ynude(i,4) - fwr1*ynude(i,2) ) /
2097     &           ( two*ynude(i,5) - four*fwr1*ynude(i,3)
2098     &           + fwr1*fwr1*ynude(i,1) )
2099         enddo
2100
2101            numNden(:,1) = whist*numNden(:,1)+wcur*xnude(ifath(:),1)
2102            numNden(:,2) = whist*numNden(:,2)+wcur*xnude(ifath(:),2)
2103
2104            xnd(:,1) = xnd(:,1)+xnude(:,1)
2105            xnd(:,2) = xnd(:,2)+xnude(:,2)
2106
2107            cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
2108
2109c            cdelsq(:) = xnude(ifath(:),1)/(xnude(ifath(:),2))!+1.d-09)
2110
2111
2112          cdelsq2(:) = ynude(ifath(:),6)  ! For comparison w/ cdelsq
2113
2114            xmodcomp(:,1) = xmodcomp(:,1)+ynude(:,1)
2115            xmodcomp(:,2) = xmodcomp(:,2)+ynude(:,2)
2116            xmodcomp(:,3) = xmodcomp(:,3)+ynude(:,3)
2117            xmodcomp(:,4) = xmodcomp(:,4)+ynude(:,4)
2118            xmodcomp(:,5) = xmodcomp(:,5)+ynude(:,5)
2119
2120            xmcomp(:,:) = xmcomp(:,:)+xm(:,:)
2121            xlcomp(:,:) = xlcomp(:,:)+xl(:,:)
2122
2123            xl1comp(:,:) = xl1comp(:,:)+xl1(:,:)
2124            xl2comp(:,:) = xl2comp(:,:)+xl2(:,:)
2125
2126            ucomp(:,:) = ucomp(:,:)+ui(:,:)
2127            scomp(:)   = scomp(:)+snorm(:)
2128
2129         endif
2130
2131c         do i = 1, nfath
2132c            xmodcomp(i,:) = xmodcomp(i,:)/nsons(i)
2133c            xmcomp(i,:) = xmcomp(i,:)/nsons(i)
2134c            xlcomp(i,:) = xlcomp(i,:)/nsons(i)
2135c            xl2comp(i,:) = xl2comp(i,:)/nsons(i)
2136c            xl1comp(i,:) = xl1comp(i,:)/nsons(i)
2137c            xnd(i,:) = xnd(i,:)/nsons(i)
2138c            scomp(i) = scomp(i)/nsons(i)
2139c            ucomp(i,:) = ucomp(i,:)/nsons(i)
2140c         enddo
2141
2142         if (myrank .eq. master) then
2143            write(*,*)'istep, nstep=', istep, nstep(1)
2144         endif
2145
2146         if ( istep .eq. (nstep(1)-1) ) then
2147         if ( myrank .eq. master) then
2148
2149            do i = 1, nfath
2150            write(365,*)xmodcomp(i,1),xmodcomp(i,2),xmodcomp(i,3),
2151     &              xmodcomp(i,4),xmodcomp(i,5)
2152
2153            write(366,*)xmcomp(i,1),xmcomp(i,2),xmcomp(i,3)
2154            write(367,*)xmcomp(i,4),xmcomp(i,5),xmcomp(i,6)
2155
2156            write(368,*)xlcomp(i,1),xlcomp(i,2),xlcomp(i,3)
2157            write(369,*)xlcomp(i,4),xlcomp(i,5),xlcomp(i,6)
2158
2159            write(370,*)xl1comp(i,1),xl1comp(i,2),xl1comp(i,3)
2160            write(371,*)xl1comp(i,4),xl1comp(i,5),xl1comp(i,6)
2161
2162            write(372,*)xl2comp(i,1),xl2comp(i,2),xl2comp(i,3)
2163            write(373,*)xl2comp(i,4),xl2comp(i,5),xl2comp(i,6)
2164
2165            write(374,*)xnd(i,1),xnd(i,2),scomp(i)
2166            write(375,*)ucomp(i,1),ucomp(i,2),ucomp(i,3)
2167            enddo
2168
2169            call flush(365)
2170            call flush(366)
2171            call flush(367)
2172            call flush(368)
2173            call flush(369)
2174            call flush(370)
2175            call flush(371)
2176            call flush(372)
2177            call flush(373)
2178            call flush(374)
2179            call flush(375)
2180
2181c            close(852)
2182c            close(853)
2183c            close(854)
2184
2185         endif
2186         endif
2187
2188            if (myrank .eq. master) then
2189               write(*,*)'uit uic=', ucomp(32,1),u1
2190            endif
2191
2192 555     format(e14.7,4(2x,e14.7))
2193 556     format(e14.7,5(2x,e14.7))
2194
2195
2196
2197
2198c $$$$$$$$$$$$$$$$$$$$$$$$$$$
2199      tmp1 =  MINVAL(cdelsq)
2200      tmp2 =  MAXVAL(cdelsq)
2201      if(numpe>1) then
2202         call MPI_REDUCE (tmp1, tmp3, 1,MPI_DOUBLE_PRECISION,
2203     &        MPI_MIN, master, MPI_COMM_WORLD, ierr)
2204         call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION,
2205     &        MPI_MAX, master, MPI_COMM_WORLD, ierr)
2206         tmp1=tmp3
2207         tmp2=tmp4
2208      endif
2209      if (myrank .EQ. master) then !print CDelta^2 range
2210         write(34,*)lstep,tmp1,tmp2
2211         call flush(34)
2212      endif
2213c $$$$$$$$$$$$$$$$$$$$$$$$$$$
2214
2215      if (myrank .eq. master) then
2216         write(*,*) 'cdelsq=', cdelsq(1),cdelsq(2)
2217         write(*,*) 'cdelsq=', cdelsq2(1),cdelsq2(2)
2218         write(22,*) lstep, cdelsq(1)
2219         call flush(22)
2220      endif
2221
2222      do iblk = 1,nelblk
2223         lcsyst = lcblk(3,iblk)
2224         iel  = lcblk(1,iblk)
2225         npro = lcblk(1,iblk+1) - iel
2226         lelCat = lcblk(2,iblk)
2227         inum  = iel + npro - 1
2228
2229         ngauss = nint(lcsyst)
2230
2231         call scatnu (mien(iblk)%p, strl(iel:inum,:),
2232     &        mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:))
2233      enddo
2234c     $$$$$$$$$$$$$$$$$$$$$$$$$$$
2235c$$$  tmp1 =  MINVAL(xmudmi)
2236c$$$  tmp2 =  MAXVAL(xmudmi)
2237c$$$  if(numpe>1) then
2238c$$$  call MPI_REDUCE (tmp1, tmp3, 1, MPI_DOUBLE_PRECISION,
2239c$$$  &                 MPI_MIN, master, MPI_COMM_WORLD, ierr)
2240c$$$  call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION,
2241c$$$  &                 MPI_MAX, master, MPI_COMM_WORLD, ierr)
2242c$$$      tmp1=tmp3
2243c$$$  tmp2=tmp4
2244c$$$  endif
2245c$$$  if (myrank .EQ. master) then
2246c$$$  write(35,*) lstep,tmp1,tmp2
2247c$$$  call flush(35)
2248c$$$  endif
2249c $$$$$$$$$$$$$$$$$$$$$$$$$$$
2250
2251c
2252c  if flag set, write a restart file with info (reuse xmij's memory)
2253c
2254      if(irs.eq.11) then
2255         lstep=999
2256         xmij(:,1)=xnum(:)
2257         xmij(:,2)=xden(:)
2258         xmij(:,3)=cdelsq(:)
2259         xmij(:,5)=xlij(:,4)    !leave M_{12} in 4 and put L_{12} here
2260         call restar('out ',xmij,xlij) !also dump all of L_{ij} in ac
2261         stop
2262      endif
2263c
2264c  local clipping moved to scatnu with the creation of mxmudmi pointers
2265c
2266c$$$      rmu=datmat(1,2,1)
2267c$$$      xmudmi=min(xmudmi,1000.0*rmu) !don't let it get larger than 1000 mu
2268c$$$      xmudmi=max(xmudmi, -rmu) ! don't let (xmudmi + mu) < 0
2269c      stop !uncomment to test dmod
2270c
2271
2272
2273c  write out the nodal values of xnut (estimate since we don't calc strain
2274c  there and must use the filtered strain).
2275c
2276
2277      if ((irs .ge. 1) .and. (mod(lstep, ntout) .eq. 0)) then
2278c
2279c  collect the average strain into xnude(2)
2280c
2281         xnude(:,2) = zero
2282         do i = 1,numnp
2283            xnude(ifath(i),2) = xnude(ifath(i),2) + strnrm(i)
2284         enddo
2285
2286         if(numpe .gt. 1) then
2287             call drvAllreduce(xnude(:,2), xnuder(:,2),nfath)
2288          else
2289             xnuder=xnude
2290          endif
2291c
2292c          nut= cdelsq    * |S|
2293c
2294         xnutf=xnuder(:,1)*xnuder(:,2)/nsons(:)
2295c
2296c  collect the x and y coords into xnude
2297c
2298         xnude = zero
2299         do i = 1,numnp
2300            xnude(ifath(i),1) = xnude(ifath(i),1) + x(i,1)
2301            xnude(ifath(i),2) = xnude(ifath(i),2) + x(i,2)
2302         enddo
2303
2304         if(numpe .gt. 1)
2305     &        call drvAllreduce(xnude, xnuder,2*nfath)
2306         xnuder(:,1)=xnuder(:,1)/nsons(:)
2307         xnuder(:,2)=xnuder(:,2)/nsons(:)
2308c
2309c  xnude is the sum of the sons for each father on this processor
2310c
2311         if((myrank.eq.master)) then
2312            do i=1,nfath      ! cdelsq   * |S|
2313               write(444,*) xnuder(i,1),xnuder(i,2),xnutf(i)
2314            enddo
2315            call flush(444)
2316         endif
2317      endif
2318
2319      return
2320      end
2321      subroutine widefdmc (y,      shgl,      shp,
2322     &                   iper,   ilwork,
2323     &                   nsons,  ifath,     x)
2324
2325      use pointer_data
2326
2327      use quadfilt   ! This module gives us shglf(maxtp,nsd,maxsh,ngaussf),
2328c                    shpf(maxtp,maxsh,ngaussf), and Qwtf(maxtp,ngaussf).
2329c                    Shpf and shglf are the shape funciotns and their
2330c                    gradient evaluated using the quadrature rule desired
2331c                    for computing the dmod. Qwtf contains the weights of the
2332c                    quad. points.
2333
2334      include "common.h"
2335      include "mpif.h"
2336      include "auxmpi.h"
2337
2338c
2339      dimension fres(nshg,33),         fwr(nshg),
2340     &          strnrm(nshg),         cdelsq(nshg),
2341     &          cdelsq2(nshg),
2342     &          xnum(nshg),           xden(nshg),
2343     &          xmij(nshg,6),         xlij(nshg,6),
2344     &          xnude(nfath,2),        xnuder(nfath,2),
2345     &          ynude(nfath,6),        ynuder(nfath,6),
2346     &          ui(nfath,3),           snorm(nfath),
2347     &          uir(nfath,3),          snormr(nfath)
2348      dimension xm(nfath,6),           xl(nfath,6),
2349     &          xl1(nfath,6),          xl2(nfath,6),
2350     &          xl1r(nfath,6),         xl2r(nfath,6),
2351     &          xmr(nfath,6),          xlr(nfath,6),
2352     &          nsons(nshg),
2353     &          strl(numel,ngauss),
2354     &          y(nshg,5),            yold(nshg,5),
2355     &          ifath(nshg),          iper(nshg),
2356     &          ilwork(nlwork),
2357     &          x(numnp,3)
2358      dimension shgl(MAXTOP,nsd,maxsh,MAXQPT), shp(MAXTOP,maxsh,MAXQPT),
2359     &          xnutf(nfath),
2360     &          hfres(nshg,22),
2361     &          xfac(nshg,5)
2362
2363      character*10 cname
2364      character*30 fname1, fname2, fname3, fname4, fname5, fname6
2365c
2366
2367c
2368c
2369c   setup the weights for time averaging of cdelsq (now in quadfilt module)
2370c
2371
2372      denom=max(1.0d0*(lstep),one)
2373      if(dtavei.lt.0) then
2374         wcur=one/denom
2375      else
2376         wcur=dtavei
2377      endif
2378      whist=1.0-wcur
2379
2380      if (myrank .eq. master) then
2381         write(*,*)'istep=', istep
2382      endif
2383
2384      if (istep .eq. 0) then
2385         xnd      = zero
2386         xmodcomp = zero
2387         xmcomp  = zero
2388         xlcomp  = zero
2389         xl1comp  = zero
2390         xl2comp  = zero
2391         ucomp    = zero
2392         scomp    = zero
2393      endif
2394
2395
2396      fres = zero
2397      hfres = zero
2398
2399      yold(:,1)=y(:,4)
2400      yold(:,2:4)=y(:,1:3)
2401
2402c
2403c  hack in an interesting velocity field (uncomment to test dmod)
2404c
2405c      yold(:,5) = 1.0  ! Debugging
2406c      yold(:,2) = 2.0*x(:,1) - 3.0*x(:,2)
2407c      yold(:,3) = 3.0*x(:,1) + 4.0*x(:,2)
2408c      yold(:,4) = 4.0*x(:,1) + x(:,2) + x(:,3)
2409c      yold(:,1) = Rgas * yold(:,5) ! Necessary to make model suitable
2410c                               suitable for the
2411
2412      do iblk = 1,nelblk
2413        lcsyst = lcblk(3,iblk)
2414        iel  = lcblk(1,iblk) !Element number where this block begins
2415        npro = lcblk(1,iblk+1) - iel
2416        lelCat = lcblk(2,iblk)
2417        nenl = lcblk(5,iblk)
2418        nshl = lcblk(10,iblk)
2419        inum  = iel + npro - 1
2420
2421        ngauss = nint(lcsyst)
2422        ngaussf = nintf(lcsyst)
2423
2424c        call hfilterBB (yold, x, mien(iblk)%p, hfres,
2425c     &               shglf(lcsyst,:,1:nshl,:),
2426c     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
2427
2428        call hfilterCC (yold, x, mien(iblk)%p, hfres,
2429     &               shglf(lcsyst,:,1:nshl,:),
2430     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
2431
2432      enddo
2433
2434      if(numpe>1) call commu (hfres, ilwork, 22, 'in ')
2435c
2436c... account for periodicity in filtered variables
2437c
2438      do j = 1,nshg  !    Add on-processor slave contribution to masters
2439        i = iper(j)
2440        if (i .ne. j) then
2441           hfres(i,:) = hfres(i,:) + hfres(j,:)
2442        endif
2443      enddo
2444      do j = 1,nshg ! Set on-processor slaves to be the same as masters
2445        i = iper(j)
2446        if (i .ne. j) then
2447           hfres(j,:) = hfres(i,:)
2448        endif
2449      enddo
2450
2451c... Set off-processor slaves to be the same as their masters
2452
2453      if(numpe>1)   call commu (hfres, ilwork, 22, 'out')
2454
2455
2456      hfres(:,16) = one / hfres(:,16) ! one/(volume filter kernel)
2457
2458      do j = 1, 15
2459	hfres(:,j) = hfres(:,j) * hfres(:,16)
2460      enddo
2461      do j = 17, 22
2462	hfres(:,j) = hfres(:,j) * hfres(:,16)
2463      enddo
2464
2465c... For debugging
2466
2467c      hfres(:,1) = 2.0*x(:,1) - 3.0*x(:,2)
2468c      hfres(:,2) = 3.0*x(:,1) + 4.0*x(:,2)
2469c      hfres(:,3) = 4.0*x(:,1) + x(:,2) + x(:,3)
2470
2471c... Done w/ h-filtering. Begin 2h-filtering.
2472
2473      do iblk = 1,nelblk
2474        lcsyst = lcblk(3,iblk)
2475        iel  = lcblk(1,iblk) !Element number where this block begins
2476        npro = lcblk(1,iblk+1) - iel
2477        lelCat = lcblk(2,iblk)
2478        nenl = lcblk(5,iblk)
2479        nshl = lcblk(10,iblk)
2480        inum  = iel + npro - 1
2481
2482        ngauss = nint(lcsyst)
2483        ngaussf = nintf(lcsyst)
2484
2485        call twohfilterBB (yold, x, strl(iel:inum,:), mien(iblk)%p,
2486     &               fres, hfres, shglf(lcsyst,:,1:nshl,:),
2487     &               shpf(lcsyst,1:nshl,:),Qwtf(lcsyst,1:ngaussf))
2488
2489      enddo
2490c
2491
2492
2493      if(numpe>1) call commu (fres, ilwork, 33, 'in ')
2494c
2495c account for periodicity in filtered variables
2496c
2497      do j = 1,nshg
2498        i = iper(j)
2499        if (i .ne. j) then
2500           fres(i,:) = fres(i,:) + fres(j,:)
2501        endif
2502      enddo
2503
2504      do j = 1,nshg
2505        i = iper(j)
2506        if (i .ne. j) then
2507           fres(j,:) = fres(i,:)
2508        endif
2509      enddo
2510
2511      if(numpe>1)then
2512         call commu (fres, ilwork, 33, 'out')
2513      endif
2514
2515      fres(:,22) = one / fres(:,22)
2516      do j = 1,21
2517        fres(:,j) = fres(:,j) * fres(:,22)
2518      enddo
2519      do j = 23,33
2520        fres(:,j) = fres(:,j) * fres(:,22)
2521      enddo
2522
2523
2524      do iblk = 1,nelblk
2525        lcsyst = lcblk(3,iblk)
2526        iel  = lcblk(1,iblk) !Element number where this block begins
2527        npro = lcblk(1,iblk+1) - iel
2528        lelCat = lcblk(2,iblk)
2529        nenl = lcblk(5,iblk)
2530        nshl = lcblk(10,iblk)
2531        inum  = iel + npro - 1
2532
2533        ngauss = nint(lcsyst)
2534
2535        call getstrl (yold, x,      mien(iblk)%p,
2536     &               strl(iel:inum,:), shgl(lcsyst,:,1:nshl,:),
2537     &               shp(lcsyst,1:nshl,:))
2538
2539      enddo
2540
2541c
2542c... Obtain the hat-tilde strain rate norm at the nodes
2543c
2544
2545      strnrm = sqrt(
2546     &  two * (fres(:,10)**2 + fres(:,11)**2 + fres(:,12)**2)
2547     &  + four * ( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
2548
2549      fwr = fwr1 * strnrm
2550
2551      xmij(:,1) = -fwr
2552     &             * fres(:,10) + fres(:,16)
2553      xmij(:,2) = -fwr
2554     &             * fres(:,11) + fres(:,17)
2555      xmij(:,3) = -fwr
2556     &             * fres(:,12) + fres(:,18)
2557
2558      xmij(:,4) = -fwr * fres(:,13) + fres(:,19)
2559      xmij(:,5) = -fwr * fres(:,14) + fres(:,20)
2560      xmij(:,6) = -fwr * fres(:,15) + fres(:,21)
2561
2562
2563      xlij(:,1) = fres(:,4) - fres(:,1) * fres(:,1)
2564      xlij(:,2) = fres(:,5) - fres(:,2) * fres(:,2)
2565      xlij(:,3) = fres(:,6) - fres(:,3) * fres(:,3)
2566      xlij(:,4) = fres(:,7) - fres(:,1) * fres(:,2)
2567      xlij(:,5) = fres(:,8) - fres(:,1) * fres(:,3)
2568      xlij(:,6) = fres(:,9) - fres(:,2) * fres(:,3)
2569
2570      xnum =        xlij(:,1) * xmij(:,1) + xlij(:,2) * xmij(:,2)
2571     &                                    + xlij(:,3) * xmij(:,3)
2572     &     + two * (xlij(:,4) * xmij(:,4) + xlij(:,5) * xmij(:,5)
2573     &                                    + xlij(:,6) * xmij(:,6))
2574      xden =        xmij(:,1) * xmij(:,1) + xmij(:,2) * xmij(:,2)
2575     &                                    + xmij(:,3) * xmij(:,3)
2576     &     + two * (xmij(:,4) * xmij(:,4) + xmij(:,5) * xmij(:,5)
2577     &                                    + xmij(:,6) * xmij(:,6))
2578      xden = two * xden
2579
2580c... For collectection of statistics on dyn. model components
2581
2582      xfac(:,1) = strnrm*strnrm*( fres(:,10)**2 + fres(:,11)**2 +
2583     &     fres(:,12)**2
2584     &     + two*( fres(:,13)**2 + fres(:,14)**2 + fres(:,15)**2 ) )
2585
2586      xfac(:,2) = strnrm*( xlij(:,1)*fres(:,10) + xlij(:,2)*fres(:,11)
2587     &     + xlij(:,3)*fres(:,12) +
2588     &     two*(xlij(:,4)*fres(:,13) + xlij(:,5)*fres(:,14) +
2589     &     xlij(:,6)*fres(:,15)) )
2590
2591      xfac(:,3) = strnrm*( fres(:,10)*fres(:,16) + fres(:,11)*fres(:,17)
2592     &     + fres(:,12)*fres(:,18) +
2593     &     two*(fres(:,13)*fres(:,19) + fres(:,14)*fres(:,20) +
2594     &     fres(:,15)*fres(:,21)) )
2595
2596      xfac(:,4) = xlij(:,1)*fres(:,16) + xlij(:,2)*fres(:,17)
2597     &     + xlij(:,3)*fres(:,18) +
2598     &     two*(xlij(:,4)*fres(:,19) + xlij(:,5)*fres(:,20) +
2599     &     xlij(:,6)*fres(:,21))
2600
2601      xfac(:,5) = fres(:,16)*fres(:,16) + fres(:,17)*fres(:,17)
2602     &     + fres(:,18)*fres(:,18) +
2603     &     two*(fres(:,19)*fres(:,19) + fres(:,20)*fres(:,20) +
2604     &     fres(:,21)*fres(:,21))
2605
2606c  zero on processor periodic nodes so that they will not be added twice
2607        do j = 1,numnp
2608          i = iper(j)
2609          if (i .ne. j) then
2610            xnum(j) = zero
2611            xden(j) = zero
2612            xfac(j,:) = zero
2613            xmij(j,:) = zero
2614            xlij(j,:) = zero
2615            fres(j,:) = zero
2616            strnrm(j) = zero
2617          endif
2618        enddo
2619
2620      if (numpe.gt.1) then
2621
2622         numtask = ilwork(1)
2623         itkbeg = 1
2624
2625c zero the nodes that are "solved" on the other processors
2626         do itask = 1, numtask
2627
2628            iacc   = ilwork (itkbeg + 2)
2629            numseg = ilwork (itkbeg + 4)
2630
2631            if (iacc .eq. 0) then
2632               do is = 1,numseg
2633                  isgbeg = ilwork (itkbeg + 3 + 2*is)
2634                  lenseg = ilwork (itkbeg + 4 + 2*is)
2635                  isgend = isgbeg + lenseg - 1
2636                  xnum(isgbeg:isgend) = zero
2637                  xden(isgbeg:isgend) = zero
2638                  strnrm(isgbeg:isgend) = zero
2639                  xfac(isgbeg:isgend,:) = zero
2640                  xmij(isgbeg:isgend,:) = zero
2641                  xlij(isgbeg:isgend,:) = zero
2642                  fres(isgbeg:isgend,:) = zero
2643               enddo
2644            endif
2645
2646            itkbeg = itkbeg + 4 + 2*numseg
2647
2648         enddo
2649
2650      endif
2651c
2652c Description of arrays.   Each processor has an array of length equal
2653c to the total number of fathers times 2 xnude(nfathers,2). One to collect
2654c the numerator and one to collect the denominator.  There is also an array
2655c of length nshg on each processor which tells the father number of each
2656c on processor node, ifath(nnshg).  Finally, there is an arry of length
2657c nfathers to tell the total (on all processors combined) number of sons
2658c for each father.
2659c
2660c  Now loop over nodes and accumlate the numerator and the denominator
2661c  to the father nodes.  Only on processor addition at this point.
2662c  Note that serrogate fathers are collect some for the case where some
2663c  sons are on another processor
2664c
2665      xnude = zero
2666      ynude = zero
2667      xm    = zero
2668      xl    = zero
2669      xl1   = zero
2670      xl2   = zero
2671      ui    = zero
2672      snorm = zero
2673
2674      do i = 1,nshg
2675         xnude(ifath(i),1) = xnude(ifath(i),1) + xnum(i)
2676         xnude(ifath(i),2) = xnude(ifath(i),2) + xden(i)
2677
2678         ynude(ifath(i),1) = ynude(ifath(i),1) + xfac(i,1)
2679         ynude(ifath(i),2) = ynude(ifath(i),2) + xfac(i,2)
2680         ynude(ifath(i),3) = ynude(ifath(i),3) + xfac(i,3)
2681         ynude(ifath(i),4) = ynude(ifath(i),4) + xfac(i,4)
2682         ynude(ifath(i),5) = ynude(ifath(i),5) + xfac(i,5)
2683
2684         xm(ifath(i),1) = xm(ifath(i),1) + xmij(i,1)
2685         xm(ifath(i),2) = xm(ifath(i),2) + xmij(i,2)
2686         xm(ifath(i),3) = xm(ifath(i),3) + xmij(i,3)
2687         xm(ifath(i),4) = xm(ifath(i),4) + xmij(i,4)
2688         xm(ifath(i),5) = xm(ifath(i),5) + xmij(i,5)
2689         xm(ifath(i),6) = xm(ifath(i),6) + xmij(i,6)
2690
2691         xl(ifath(i),1) = xl(ifath(i),1) + xlij(i,1)
2692         xl(ifath(i),2) = xl(ifath(i),2) + xlij(i,2)
2693         xl(ifath(i),3) = xl(ifath(i),3) + xlij(i,3)
2694         xl(ifath(i),4) = xl(ifath(i),4) + xlij(i,4)
2695         xl(ifath(i),5) = xl(ifath(i),5) + xlij(i,5)
2696         xl(ifath(i),6) = xl(ifath(i),6) + xlij(i,6)
2697
2698         xl1(ifath(i),1) = xl1(ifath(i),1) + fres(i,4)
2699         xl1(ifath(i),2) = xl1(ifath(i),2) + fres(i,5)
2700         xl1(ifath(i),3) = xl1(ifath(i),3) + fres(i,6)
2701         xl1(ifath(i),4) = xl1(ifath(i),4) + fres(i,7)
2702         xl1(ifath(i),5) = xl1(ifath(i),5) + fres(i,8)
2703         xl1(ifath(i),6) = xl1(ifath(i),6) + fres(i,9)
2704
2705         xl2(ifath(i),1) = xl2(ifath(i),1) + fres(i,1)*fres(i,1)
2706         xl2(ifath(i),2) = xl2(ifath(i),2) + fres(i,2)*fres(i,2)
2707         xl2(ifath(i),3) = xl2(ifath(i),3) + fres(i,3)*fres(i,3)
2708         xl2(ifath(i),4) = xl2(ifath(i),4) + fres(i,1)*fres(i,2)
2709         xl2(ifath(i),5) = xl2(ifath(i),5) + fres(i,1)*fres(i,3)
2710         xl2(ifath(i),6) = xl2(ifath(i),6) + fres(i,2)*fres(i,3)
2711
2712         ui(ifath(i),1) = ui(ifath(i),1) + fres(i,1)
2713         ui(ifath(i),2) = ui(ifath(i),2) + fres(i,2)
2714         ui(ifath(i),3) = ui(ifath(i),3) + fres(i,3)
2715
2716         snorm(ifath(i)) = snorm(ifath(i)) + strnrm(i)
2717
2718      enddo
2719
2720c
2721c Now  the true fathers and serrogates combine results and update
2722c each other.
2723c
2724      if(numpe .gt. 1)then
2725         call drvAllreduce(xnude, xnuder,2*nfath)
2726         call drvAllreduce(ynude, ynuder,6*nfath)
2727         call drvAllreduce(xm, xmr,6*nfath)
2728         call drvAllreduce(xl, xlr,6*nfath)
2729         call drvAllreduce(xl1, xl1r,6*nfath)
2730         call drvAllreduce(xl2, xl2r,6*nfath)
2731         call drvAllreduce(ui, uir,3*nfath)
2732         call drvAllreduce(snorm, snormr,nfath)
2733
2734         do i = 1, nfath
2735            ynuder(i,6) = ( ynuder(i,4) - fwr1*ynuder(i,2) ) /
2736     &           ( two*ynuder(i,5) - four*fwr1*ynuder(i,3)
2737     &           + two*fwr1*fwr1*ynuder(i,1) )
2738         enddo
2739
2740         cdelsq2(:) = ynuder(ifath(:),6)  ! For comparison w/ cdelsq
2741c
2742c  xnude is the sum of the sons for each father on this processor
2743c
2744c  xnuder is the sum of the sons for each father on all processor combined
2745c  (the same as if we had not partitioned the mesh for each processor)
2746c
2747c   For each father we have precomputed the number of sons (including
2748c   the sons off processor).
2749c
2750c   Now divide by number of sons to get the average (not really necessary
2751c   for dynamic model since ratio will cancel nsons at each father)
2752c
2753         xnuder(:,1) = xnuder(:,1) / nsons(:)
2754         xnuder(:,2) = xnuder(:,2) / nsons(:)
2755
2756         do m = 1, 5
2757         ynuder(:,m) = ynuder(:,m)/nsons(:)
2758         enddo
2759         do m = 1,6
2760         xmr(:,m) = xmr(:,m)/nsons(:)
2761         xlr(:,m) = xlr(:,m)/nsons(:)
2762         xl1r(:,m) = xl1r(:,m)/nsons(:)
2763         xl2r(:,m) = xl2r(:,m)/nsons(:)
2764         enddo
2765
2766         uir(:,1) = uir(:,1)/nsons(:)
2767         uir(:,2) = uir(:,2)/nsons(:)
2768         uir(:,3) = uir(:,3)/nsons(:)
2769
2770         snormr(:) = snormr(:)/nsons(:)
2771
2772c
2773cc  the next line is c \Delta^2
2774cc
2775cc         xnuder(:,1) = xnuder(:,1) / (xnuder(:,2) + 1.d-09)
2776cc         do i = 1,nshg
2777cc            cdelsq(i) = xnuder(ifath(i),1)
2778cc         enddo
2779
2780            numNden(:,1) = whist*numNden(:,1)+wcur*xnuder(ifath(:),1)
2781            numNden(:,2) = whist*numNden(:,2)+wcur*xnuder(ifath(:),2)
2782            cdelsq(:) = numNden(:,1) / (numNden(:,2) + 1.d-09)
2783
2784c            cdelsq(:) = xnuder(ifath(:),1)/(xnuder(ifath(:),2)+1.d-09)
2785
2786            xnd(:,1) = xnd(:,1) + xnuder(:,1)
2787            xnd(:,2) = xnd(:,2) + xnuder(:,2)
2788
2789            xmodcomp(:,1) = xmodcomp(:,1)+ynuder(:,1)
2790            xmodcomp(:,2) = xmodcomp(:,2)+ynuder(:,2)
2791            xmodcomp(:,3) = xmodcomp(:,3)+ynuder(:,3)
2792            xmodcomp(:,4) = xmodcomp(:,4)+ynuder(:,4)
2793            xmodcomp(:,5) = xmodcomp(:,5)+ynuder(:,5)
2794
2795            xmcomp(:,:) = xmcomp(:,:)+xmr(:,:)
2796            xlcomp(:,:) = xlcomp(:,:)+xlr(:,:)
2797
2798            xl1comp(:,:) = xl1comp(:,:)+xl1r(:,:)
2799            xl2comp(:,:) = xl2comp(:,:)+xl2r(:,:)
2800
2801            ucomp(:,:) = ucomp(:,:)+uir(:,:)
2802            u1 = uir(32,1)
2803            scomp(:)   = scomp(:)+snormr(:)
2804
2805      else
2806
2807         xnude(:,1) = xnude(:,1)/nsons(:)
2808         xnude(:,2) = xnude(:,2)/nsons(:)
2809
2810         do m = 1, 5
2811         ynude(:,m) = ynude(:,m)/nsons(:)
2812         enddo
2813         do m = 1,6
2814         xm(:,m) = xm(:,m)/nsons(:)
2815         xl(:,m) = xl(:,m)/nsons(:)
2816         xl1(:,m) = xl1(:,m)/nsons(:)
2817         xl2(:,m) = xl2(:,m)/nsons(:)
2818         enddo
2819
2820         ui(:,1) = ui(:,1)/nsons(:)
2821         ui(:,2) = ui(:,2)/nsons(:)
2822         ui(:,3) = ui(:,3)/nsons(:)
2823
2824         snorm(:) = snorm(:)/nsons(:)
2825c
2826c     the next line is c \Delta^2, not nu_T but we want to save the
2827c     memory
2828c
2829
2830cc         xnude(:,1) = xnude(:,1) / (xnude(:,2) + 1.d-09)
2831cc        do i = 1,nshg
2832cc            cdelsq(i) = xnude(ifath(i),1)
2833cc         enddo
2834cc      endif
2835
2836         do i = 1, nfath
2837            ynude(i,6) = ( ynude(i,4) - fwr1*ynude(i,2) ) /
2838     &           ( two*ynude(i,5) - four*fwr1*ynude(i,3)
2839     &           + fwr1*fwr1*ynude(i,1) )
2840         enddo
2841
2842            numNden(:,1) = whist*numNden(:,1)+wcur*xnude(ifath(:),1)
2843            numNden(:,2) = whist*numNden(:,2)+wcur*xnude(ifath(:),2)
2844
2845            xnd(:,1) = xnd(:,1)+xnude(:,1)
2846            xnd(:,2) = xnd(:,2)+xnude(:,2)
2847
2848            cdelsq(:) = numNden(:,1) / (numNden(:,2)) ! + 1.d-09)
2849
2850c            cdelsq(:) = xnude(ifath(:),1)/(xnude(ifath(:),2))!+1.d-09)
2851
2852
2853          cdelsq2(:) = ynude(ifath(:),6)  ! For comparison w/ cdelsq
2854
2855            xmodcomp(:,1) = xmodcomp(:,1)+ynude(:,1)
2856            xmodcomp(:,2) = xmodcomp(:,2)+ynude(:,2)
2857            xmodcomp(:,3) = xmodcomp(:,3)+ynude(:,3)
2858            xmodcomp(:,4) = xmodcomp(:,4)+ynude(:,4)
2859            xmodcomp(:,5) = xmodcomp(:,5)+ynude(:,5)
2860
2861            xmcomp(:,:) = xmcomp(:,:)+xm(:,:)
2862            xlcomp(:,:) = xlcomp(:,:)+xl(:,:)
2863
2864            xl1comp(:,:) = xl1comp(:,:)+xl1(:,:)
2865            xl2comp(:,:) = xl2comp(:,:)+xl2(:,:)
2866
2867            ucomp(:,:) = ucomp(:,:)+ui(:,:)
2868            u1 = ui(32,1)
2869            scomp(:)   = scomp(:)+snorm(:)
2870
2871         endif
2872
2873
2874c         do i = 1, nfath
2875c            xmodcomp(i,:) = xmodcomp(i,:)/nsons(i)
2876c            xmcomp(i,:) = xmcomp(i,:)/nsons(i)
2877c            xlcomp(i,:) = xlcomp(i,:)/nsons(i)
2878c            xl2comp(i,:) = xl2comp(i,:)/nsons(i)
2879c            xl1comp(i,:) = xl1comp(i,:)/nsons(i)
2880c            xnd(i,:) = xnd(i,:)/nsons(i)
2881c            scomp(i) = scomp(i)/nsons(i)
2882c            ucomp(i,:) = ucomp(i,:)/nsons(i)
2883c         enddo
2884
2885         if ( istep .eq. (nstep(1)-1) ) then
2886         if ( myrank .eq. master) then
2887
2888            do i = 1, nfath
2889            write(365,*)xmodcomp(i,1),xmodcomp(i,2),xmodcomp(i,3),
2890     &              xmodcomp(i,4),xmodcomp(i,5)
2891
2892            write(366,*)xmcomp(i,1),xmcomp(i,2),xmcomp(i,3)
2893            write(367,*)xmcomp(i,4),xmcomp(i,5),xmcomp(i,6)
2894
2895            write(368,*)xlcomp(i,1),xlcomp(i,2),xlcomp(i,3)
2896            write(369,*)xlcomp(i,4),xlcomp(i,5),xlcomp(i,6)
2897
2898            write(370,*)xl1comp(i,1),xl1comp(i,2),xl1comp(i,3)
2899            write(371,*)xl1comp(i,4),xl1comp(i,5),xl1comp(i,6)
2900
2901            write(372,*)xl2comp(i,1),xl2comp(i,2),xl2comp(i,3)
2902            write(373,*)xl2comp(i,4),xl2comp(i,5),xl2comp(i,6)
2903
2904            write(374,*)xnd(i,1),xnd(i,2),scomp(i)
2905            write(375,*)ucomp(i,1),ucomp(i,2),ucomp(i,3)
2906
2907c            write(*,*)'uit uic=', ucomp(32,1),u1
2908            enddo
2909
2910
2911            call flush(365)
2912            call flush(366)
2913            call flush(367)
2914            call flush(368)
2915            call flush(369)
2916            call flush(370)
2917            call flush(371)
2918            call flush(372)
2919            call flush(373)
2920            call flush(374)
2921            call flush(375)
2922
2923c            if (myrank .eq. master) then
2924c               write(*,*)'uit uic=', ucomp(32,1),u1
2925c            endif
2926
2927
2928c            close(852)
2929c            close(853)
2930c            close(854)
2931
2932         endif
2933         endif
2934
2935            if (myrank .eq. master) then
2936               write(*,*)'uit uic=', ucomp(32,1),u1
2937            endif
2938
2939
2940 555     format(e14.7,4(2x,e14.7))
2941 556     format(e14.7,5(2x,e14.7))
2942
2943c         close(849)
2944c         close(850)
2945c         close(851)
2946c         close(852)
2947c         close(853)
2948c         close(854)
2949
2950c $$$$$$$$$$$$$$$$$$$$$$$$$$$
2951      tmp1 =  MINVAL(cdelsq)
2952      tmp2 =  MAXVAL(cdelsq)
2953      if(numpe>1) then
2954         call MPI_REDUCE (tmp1, tmp3, 1,MPI_DOUBLE_PRECISION,
2955     &        MPI_MIN, master, MPI_COMM_WORLD, ierr)
2956         call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION,
2957     &        MPI_MAX, master, MPI_COMM_WORLD, ierr)
2958         tmp1=tmp3
2959         tmp2=tmp4
2960      endif
2961      if (myrank .EQ. master) then !print CDelta^2 range
2962         write(34,*)lstep,tmp1,tmp2
2963         call flush(34)
2964      endif
2965c $$$$$$$$$$$$$$$$$$$$$$$$$$$
2966
2967      if (myrank .eq. master) then
2968         write(*,*) 'cdelsq=', cdelsq(1),cdelsq(2)
2969         write(*,*) 'cdelsq=', cdelsq2(1),cdelsq2(2)
2970         write(22,*) lstep, cdelsq(1)
2971         call flush(22)
2972      endif
2973
2974      do iblk = 1,nelblk
2975         lcsyst = lcblk(3,iblk)
2976         iel  = lcblk(1,iblk)
2977         npro = lcblk(1,iblk+1) - iel
2978         lelCat = lcblk(2,iblk)
2979         inum  = iel + npro - 1
2980
2981         ngauss = nint(lcsyst)
2982
2983         call scatnu (mien(iblk)%p, strl(iel:inum,:),
2984     &        mxmudmi(iblk)%p,cdelsq,shp(lcsyst,1:nshl,:))
2985      enddo
2986c     $$$$$$$$$$$$$$$$$$$$$$$$$$$
2987c$$$  tmp1 =  MINVAL(xmudmi)
2988c$$$  tmp2 =  MAXVAL(xmudmi)
2989c$$$  if(numpe>1) then
2990c$$$  call MPI_REDUCE (tmp1, tmp3, 1, MPI_DOUBLE_PRECISION,
2991c$$$  &                 MPI_MIN, master, MPI_COMM_WORLD, ierr)
2992c$$$  call MPI_REDUCE (tmp2, tmp4, 1, MPI_DOUBLE_PRECISION,
2993c$$$  &                 MPI_MAX, master, MPI_COMM_WORLD, ierr)
2994c$$$      tmp1=tmp3
2995c$$$  tmp2=tmp4
2996c$$$  endif
2997c$$$  if (myrank .EQ. master) then
2998c$$$  write(35,*) lstep,tmp1,tmp2
2999c$$$  call flush(35)
3000c$$$  endif
3001c $$$$$$$$$$$$$$$$$$$$$$$$$$$
3002
3003c
3004c  if flag set, write a restart file with info (reuse xmij's memory)
3005c
3006      if(irs.eq.11) then
3007         lstep=999
3008         xmij(:,1)=xnum(:)
3009         xmij(:,2)=xden(:)
3010         xmij(:,3)=cdelsq(:)
3011         xmij(:,5)=xlij(:,4)    !leave M_{12} in 4 and put L_{12} here
3012         call restar('out ',xmij,xlij) !also dump all of L_{ij} in ac
3013         stop
3014      endif
3015c
3016c  local clipping moved to scatnu with the creation of mxmudmi pointers
3017c
3018c$$$      rmu=datmat(1,2,1)
3019c$$$      xmudmi=min(xmudmi,1000.0*rmu) !don't let it get larger than 1000 mu
3020c$$$      xmudmi=max(xmudmi, -rmu) ! don't let (xmudmi + mu) < 0
3021c      stop !uncomment to test dmod
3022c
3023
3024
3025c  write out the nodal values of xnut (estimate since we don't calc strain
3026c  there and must use the filtered strain).
3027c
3028
3029      if ((irs .ge. 1) .and. (mod(lstep, ntout) .eq. 0)) then
3030c
3031c  collect the average strain into xnude(2)
3032c
3033         xnude(:,2) = zero
3034         do i = 1,numnp
3035            xnude(ifath(i),2) = xnude(ifath(i),2) + strnrm(i)
3036         enddo
3037
3038         if(numpe .gt. 1) then
3039             call drvAllreduce(xnude(:,2), xnuder(:,2),nfath)
3040          else
3041             xnuder=xnude
3042          endif
3043c
3044c          nut= cdelsq    * |S|
3045c
3046         xnutf=xnuder(:,1)*xnuder(:,2)/nsons(:)
3047c
3048c  collect the x and y coords into xnude
3049c
3050         xnude = zero
3051         do i = 1,numnp
3052            xnude(ifath(i),1) = xnude(ifath(i),1) + x(i,1)
3053            xnude(ifath(i),2) = xnude(ifath(i),2) + x(i,2)
3054         enddo
3055
3056         if(numpe .gt. 1)
3057     &        call drvAllreduce(xnude, xnuder,2*nfath)
3058         xnuder(:,1)=xnuder(:,1)/nsons(:)
3059         xnuder(:,2)=xnuder(:,2)/nsons(:)
3060c
3061c  xnude is the sum of the sons for each father on this processor
3062c
3063         if((myrank.eq.master)) then
3064            do i=1,nfath      ! cdelsq   * |S|
3065               write(444,*) xnuder(i,1),xnuder(i,2),xnutf(i)
3066            enddo
3067            call flush(444)
3068         endif
3069      endif
3070
3071      return
3072      end
3073