xref: /petsc/src/sys/tests/ex69f.F90 (revision 9f0612e409f6220a780be6348417bea34ef34962)
1*9f0612e4SBarry Smith    program ex69F90
2*9f0612e4SBarry Smith
3*9f0612e4SBarry Smith!   Demonstrates two issues
4*9f0612e4SBarry Smith!
5*9f0612e4SBarry Smith!   A) How using mpiexec to start up a program can dramatically change
6*9f0612e4SBarry Smith!      the OpenMP thread binding/mapping resulting in poor performance
7*9f0612e4SBarry Smith!
8*9f0612e4SBarry Smith!      Set the environmental variable with, for example,
9*9f0612e4SBarry Smith!        export OMP_NUM_THREADS=4
10*9f0612e4SBarry Smith!      Run this example on one MPI process three ways
11*9f0612e4SBarry Smith!        ./ex69f
12*9f0612e4SBarry Smith!        mpiexec -n 1 ./ex69f
13*9f0612e4SBarry Smith!        mpiexec --bind-to numa -n 1 ./ex69f
14*9f0612e4SBarry Smith!
15*9f0612e4SBarry Smith!      You may get very different wall clock times
16*9f0612e4SBarry Smith!      It seems some mpiexec implementations change the thred binding/mapping that results with
17*9f0612e4SBarry Smith!      OpenMP so all the threads are run on a single core
18*9f0612e4SBarry Smith!
19*9f0612e4SBarry Smith!      The same differences occur without the PetscInitialize() call indicating
20*9f0612e4SBarry Smith!      the binding change is done by the mpiexec, not the MPI_Init()
21*9f0612e4SBarry Smith!
22*9f0612e4SBarry Smith!   B) How cpu_time() may give unexpected results, much larger than expected,
23*9f0612e4SBarry Smith!      even for code portions with no OpenMP
24*9f0612e4SBarry Smith!
25*9f0612e4SBarry Smith!      Note the CPU time for output of the second loop, it should equal the wallclock time
26*9f0612e4SBarry Smith!      since the loop is not run in parallel (with OpenMP) but instead it may be listed as
27*9f0612e4SBarry Smith!      many times higher
28*9f0612e4SBarry Smith!
29*9f0612e4SBarry Smith!     $ OMP_NUM_THREADS=8 ./ex69f (ifort compiler)
30*9f0612e4SBarry Smith!       CPU time reported by cpu_time()              1.66649300000000
31*9f0612e4SBarry Smith!       Wall clock time reported by system_clock()   0.273980000000000
32*9f0612e4SBarry Smith!       Wall clock time reported by omp_get_wtime()  0.273979902267456
33*9f0612e4SBarry Smith!
34*9f0612e4SBarry Smith#include <petsc/finclude/petscsys.h>
35*9f0612e4SBarry Smith    use petsc
36*9f0612e4SBarry Smith    implicit none
37*9f0612e4SBarry Smith
38*9f0612e4SBarry Smith    PetscErrorCode ierr
39*9f0612e4SBarry Smith    double precision cputime_start,cputime_end,wtime_start,wtime_end,omp_get_wtime
40*9f0612e4SBarry Smith    integer(kind = 8) systime_start,systime_end,systime_rate
41*9f0612e4SBarry Smith    double precision x(100)
42*9f0612e4SBarry Smith    integer i,maxthreads,omp_get_max_threads
43*9f0612e4SBarry Smith
44*9f0612e4SBarry Smith    PetscCallA(PetscInitialize(ierr))
45*9f0612e4SBarry Smith    call system_clock(systime_start,systime_rate)
46*9f0612e4SBarry Smith    wtime_start = omp_get_wtime()
47*9f0612e4SBarry Smith    call cpu_time(cputime_start)
48*9f0612e4SBarry Smith!$OMP PARALLEL DO
49*9f0612e4SBarry Smith    do i=1,100
50*9f0612e4SBarry Smith      x(i) = exp(3.0d0*i)
51*9f0612e4SBarry Smith    enddo
52*9f0612e4SBarry Smith    call cpu_time(cputime_end)
53*9f0612e4SBarry Smith    call system_clock(systime_end,systime_rate)
54*9f0612e4SBarry Smith    wtime_end = omp_get_wtime()
55*9f0612e4SBarry Smith    print*,'CPU time reported by cpu_time()            ', cputime_end - cputime_start
56*9f0612e4SBarry Smith    print*,'Wall clock time reported by system_clock() ',real(systime_end - systime_start,kind=8)/real(systime_rate,kind=8)
57*9f0612e4SBarry Smith    print*,'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start
58*9f0612e4SBarry Smith    print*,'Value of x(22)',x(22)
59*9f0612e4SBarry Smith!$  maxthreads = omp_get_max_threads()
60*9f0612e4SBarry Smith    print*,'Number of threads set',maxthreads
61*9f0612e4SBarry Smith
62*9f0612e4SBarry Smith    call system_clock(systime_start,systime_rate)
63*9f0612e4SBarry Smith    wtime_start = omp_get_wtime()
64*9f0612e4SBarry Smith    call cpu_time(cputime_start)
65*9f0612e4SBarry Smith    do i=1,100
66*9f0612e4SBarry Smith      x(i) = exp(3.0d0*i)
67*9f0612e4SBarry Smith    enddo
68*9f0612e4SBarry Smith    call cpu_time(cputime_end)
69*9f0612e4SBarry Smith    call system_clock(systime_end,systime_rate)
70*9f0612e4SBarry Smith    wtime_end = omp_get_wtime()
71*9f0612e4SBarry Smith    print*,'CPU time reported by cpu_time()            ', cputime_end - cputime_start
72*9f0612e4SBarry Smith    print*,'Wall clock time reported by system_clock() ',real(systime_end - systime_start,kind=8)/real(systime_rate,kind=8)
73*9f0612e4SBarry Smith    print*,'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start
74*9f0612e4SBarry Smith    print*,'Value of x(22)',x(22)
75*9f0612e4SBarry Smith    PetscCallA(PetscFinalize(ierr))
76*9f0612e4SBarry Smithend program ex69F90
77*9f0612e4SBarry Smith
78*9f0612e4SBarry Smith!/*TEST
79*9f0612e4SBarry Smith!
80*9f0612e4SBarry Smith!   build:
81*9f0612e4SBarry Smith!     requires: openmp
82*9f0612e4SBarry Smith!
83*9f0612e4SBarry Smith!   test:
84*9f0612e4SBarry Smith!     filter: grep -v "Number of threads"
85*9f0612e4SBarry Smith!
86*9f0612e4SBarry Smith!TEST*/
87