xref: /petsc/src/sys/tests/ex69f.F90 (revision d7c1f4409a34685d8dcd545a97d161d483d89f66)
19f0612e4SBarry Smith    program ex69F90
29f0612e4SBarry Smith
39f0612e4SBarry Smith!   Demonstrates two issues
49f0612e4SBarry Smith!
59f0612e4SBarry Smith!   A) How using mpiexec to start up a program can dramatically change
69f0612e4SBarry Smith!      the OpenMP thread binding/mapping resulting in poor performance
79f0612e4SBarry Smith!
89f0612e4SBarry Smith!      Set the environmental variable with, for example,
99f0612e4SBarry Smith!        export OMP_NUM_THREADS=4
109f0612e4SBarry Smith!      Run this example on one MPI process three ways
119f0612e4SBarry Smith!        ./ex69f
129f0612e4SBarry Smith!        mpiexec -n 1 ./ex69f
139f0612e4SBarry Smith!        mpiexec --bind-to numa -n 1 ./ex69f
149f0612e4SBarry Smith!
159f0612e4SBarry Smith!      You may get very different wall clock times
16*d7c1f440SPierre Jolivet!      It seems some mpiexec implementations change the thread binding/mapping that results with
179f0612e4SBarry Smith!      OpenMP so all the threads are run on a single core
189f0612e4SBarry Smith!
199f0612e4SBarry Smith!      The same differences occur without the PetscInitialize() call indicating
209f0612e4SBarry Smith!      the binding change is done by the mpiexec, not the MPI_Init()
219f0612e4SBarry Smith!
229f0612e4SBarry Smith!   B) How cpu_time() may give unexpected results, much larger than expected,
239f0612e4SBarry Smith!      even for code portions with no OpenMP
249f0612e4SBarry Smith!
259f0612e4SBarry Smith!      Note the CPU time for output of the second loop, it should equal the wallclock time
269f0612e4SBarry Smith!      since the loop is not run in parallel (with OpenMP) but instead it may be listed as
279f0612e4SBarry Smith!      many times higher
289f0612e4SBarry Smith!
299f0612e4SBarry Smith!     $ OMP_NUM_THREADS=8 ./ex69f (ifort compiler)
309f0612e4SBarry Smith!       CPU time reported by cpu_time()              1.66649300000000
319f0612e4SBarry Smith!       Wall clock time reported by system_clock()   0.273980000000000
329f0612e4SBarry Smith!       Wall clock time reported by omp_get_wtime()  0.273979902267456
339f0612e4SBarry Smith!
349f0612e4SBarry Smith#include <petsc/finclude/petscsys.h>
359f0612e4SBarry Smith    use petsc
369f0612e4SBarry Smith    implicit none
379f0612e4SBarry Smith
389f0612e4SBarry Smith    PetscErrorCode ierr
399f0612e4SBarry Smith    double precision cputime_start,cputime_end,wtime_start,wtime_end,omp_get_wtime
409f0612e4SBarry Smith    integer(kind = 8) systime_start,systime_end,systime_rate
419f0612e4SBarry Smith    double precision x(100)
429f0612e4SBarry Smith    integer i,maxthreads,omp_get_max_threads
439f0612e4SBarry Smith
449f0612e4SBarry Smith    PetscCallA(PetscInitialize(ierr))
459f0612e4SBarry Smith    call system_clock(systime_start,systime_rate)
469f0612e4SBarry Smith    wtime_start = omp_get_wtime()
479f0612e4SBarry Smith    call cpu_time(cputime_start)
489f0612e4SBarry Smith!$OMP PARALLEL DO
499f0612e4SBarry Smith    do i=1,100
509f0612e4SBarry Smith      x(i) = exp(3.0d0*i)
519f0612e4SBarry Smith    enddo
529f0612e4SBarry Smith    call cpu_time(cputime_end)
539f0612e4SBarry Smith    call system_clock(systime_end,systime_rate)
549f0612e4SBarry Smith    wtime_end = omp_get_wtime()
559f0612e4SBarry Smith    print*,'CPU time reported by cpu_time()            ', cputime_end - cputime_start
569f0612e4SBarry Smith    print*,'Wall clock time reported by system_clock() ',real(systime_end - systime_start,kind=8)/real(systime_rate,kind=8)
579f0612e4SBarry Smith    print*,'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start
589f0612e4SBarry Smith    print*,'Value of x(22)',x(22)
599f0612e4SBarry Smith!$  maxthreads = omp_get_max_threads()
609f0612e4SBarry Smith    print*,'Number of threads set',maxthreads
619f0612e4SBarry Smith
629f0612e4SBarry Smith    call system_clock(systime_start,systime_rate)
639f0612e4SBarry Smith    wtime_start = omp_get_wtime()
649f0612e4SBarry Smith    call cpu_time(cputime_start)
659f0612e4SBarry Smith    do i=1,100
669f0612e4SBarry Smith      x(i) = exp(3.0d0*i)
679f0612e4SBarry Smith    enddo
689f0612e4SBarry Smith    call cpu_time(cputime_end)
699f0612e4SBarry Smith    call system_clock(systime_end,systime_rate)
709f0612e4SBarry Smith    wtime_end = omp_get_wtime()
719f0612e4SBarry Smith    print*,'CPU time reported by cpu_time()            ', cputime_end - cputime_start
729f0612e4SBarry Smith    print*,'Wall clock time reported by system_clock() ',real(systime_end - systime_start,kind=8)/real(systime_rate,kind=8)
739f0612e4SBarry Smith    print*,'Wall clock time reported by omp_get_wtime()', wtime_end - wtime_start
749f0612e4SBarry Smith    print*,'Value of x(22)',x(22)
759f0612e4SBarry Smith    PetscCallA(PetscFinalize(ierr))
769f0612e4SBarry Smithend program ex69F90
779f0612e4SBarry Smith
789f0612e4SBarry Smith!/*TEST
799f0612e4SBarry Smith!
809f0612e4SBarry Smith!   build:
819f0612e4SBarry Smith!     requires: openmp
829f0612e4SBarry Smith!
839f0612e4SBarry Smith!   test:
849f0612e4SBarry Smith!     filter: grep -v "Number of threads"
859f0612e4SBarry Smith!
869f0612e4SBarry Smith!TEST*/
87