xref: /petsc/config/petsc_harness.sh (revision badd099fb2ece77d080fc02aefe95d4a02e75697)
1
2
3scriptname=`basename $0`
4rundir=${scriptname%.sh}
5TIMEOUT=60
6
7if test "$PWD"!=`dirname $0`; then
8  cd `dirname $0`
9  abspath_scriptdir=$PWD
10fi
11if test -d "${rundir}" && test -n "${rundir}"; then
12  rm -f ${rundir}/*.tmp ${rundir}/*.err ${rundir}/*.out
13fi
14mkdir -p ${rundir}
15if test -n "${runfiles}"; then
16  for runfile in ${runfiles}; do
17      subdir=`dirname ${runfile}`
18      mkdir -p ${rundir}/${subdir}
19      cp -r ${runfile} ${rundir}/${subdir}
20  done
21fi
22cd ${rundir}
23
24#
25# Method to print out general and script specific options
26#
27print_usage() {
28
29cat >&2 <<EOF
30Usage: $0 [options]
31
32OPTIONS
33  -a <args> ......... Override default arguments
34  -c ................ Cleanup (remove generated files)
35  -C ................ Compile
36  -d ................ Launch in debugger
37  -e <args> ......... Add extra arguments to default
38  -E <args> ......... Add final arguments to default
39  -f ................ force attempt to run test that would otherwise be skipped
40  -h ................ help: print this message
41  -n <integer> ...... Override the number of processors to use
42  -j ................ Pass -j to petscdiff (just use diff)
43  -J <arg> .......... Pass -J to petscdiff (just use diff with arg)
44  -m ................ Update results using petscdiff
45  -M ................ Update alt files using petscdiff
46  -o <arg> .......... Output format: 'interactive', 'err_only'
47  -p ................ Print command:  Print first command and exit
48  -t ................ Override the default timeout (default=$TIMEOUT sec)
49  -U ................ run cUda-memcheck
50  -V ................ run Valgrind
51  -v ................ Verbose: Print commands
52EOF
53
54  if declare -f extrausage > /dev/null; then extrausage; fi
55  exit $1
56}
57###
58##  Arguments for overriding things
59#
60output_fmt="interactive"
61verbose=false
62cleanup=false
63compile=false
64debugger=false
65printcmd=false
66mpiexec_function=false
67force=false
68diff_flags=""
69while getopts "a:cCde:E:fhjJ:mMn:o:pt:UvV" arg
70do
71  case $arg in
72    a ) args="$OPTARG"       ;;
73    c ) cleanup=true         ;;
74    C ) compile=true         ;;
75    d ) debugger=true        ;;
76    e ) extra_args="$OPTARG" ;;
77    E ) final_args="$OPTARG" ;;
78    f ) force=true           ;;
79    h ) print_usage; exit    ;;
80    n ) nsize="$OPTARG"      ;;
81    j ) diff_flags=$diff_flags" -j"      ;;
82    J ) diff_flags=$diff_flags" -J $OPTARG" ;;
83    m ) diff_flags=$diff_flags" -m"      ;;
84    M ) diff_flags=$diff_flags" -M"      ;;
85    o ) output_fmt=$OPTARG   ;;
86    p ) printcmd=true        ;;
87    t ) TIMEOUT=$OPTARG      ;;
88    U ) mpiexec="petsc_mpiexec_cudamemcheck $mpiexec"
89        mpiexec_function=true
90        ;;
91    V ) mpiexec="petsc_mpiexec_valgrind $mpiexec"
92        mpiexec_function=true
93        ;;
94    v ) verbose=true         ;;
95    *)  # To take care of any extra args
96      if test -n "$OPTARG"; then
97        eval $arg=\"$OPTARG\"
98      else
99        eval $arg=found
100      fi
101      ;;
102  esac
103done
104shift $(( $OPTIND - 1 ))
105
106# Individual tests can extend the default
107export MPIEXEC_TIMEOUT=$((TIMEOUT*timeoutfactor))
108STARTTIME=`date +%s`
109
110if test -n "$extra_args"; then
111  args="$extra_args $args"
112fi
113if test -n "$final_args"; then
114  args="$args $final_args"
115fi
116if $debugger; then
117  args="-start_in_debugger $args"
118fi
119if test -n "$filter"; then
120  diff_flags=$diff_flags" -F \$'$filter'"
121fi
122if test -n "$filter_output"; then
123  diff_flags=$diff_flags" -f \$'$filter_output'"
124fi
125
126
127# Init
128success=0; failed=0; failures=""; rmfiles=""
129total=0
130todo=-1; skip=-1
131job_level=0
132
133if $compile; then
134   curexec=`basename ${exec}`
135   fullexec=${abspath_scriptdir}/${curexec}
136   maketarget=`echo ${fullexec} | sed "s#${petsc_dir}/*##"`
137   (cd $petsc_dir && make -f gmakefile.test ${maketarget})
138fi
139
140###
141##   Rest of code is functions
142#
143function petsc_report_tapoutput() {
144  notornot=$1
145  test_label=$2
146  comment=$3
147  if test -n "$comment"; then
148    comment=" # ${comment}"
149  fi
150
151  tap_message="${notornot} ok ${test_label}${comment}"
152
153  # Log messages
154  printf "${tap_message}\n" >> ${testlogtapfile}
155
156  if test ${output_fmt} == "err_only"; then
157     if test -n "${notornot}"; then
158        printf "${tap_message}\n" | tee -a ${testlogerrfile}
159     fi
160  else
161     printf "${tap_message}\n"
162  fi
163}
164
165function printcmd() {
166  # Print command that can be run from PETSC_DIR
167  cmd="$1"
168  basedir=`dirname ${PWD} | sed "s#${petsc_dir}/##"`
169  modcmd=`echo ${cmd} | sed -e "s#\.\.#${basedir}#" | sed s#\>.*## | sed s#\%#\%\%#`
170  if $mpiexec_function; then
171     # Have to expand valgrind/cudamemcheck
172     modcmd=`eval "$modcmd"`
173  fi
174  printf "${modcmd}\n"
175  exit
176}
177
178function petsc_testrun() {
179  # First arg = Basic command
180  # Second arg = stdout file
181  # Third arg = stderr file
182  # Fourth arg = label for reporting
183  rmfiles="${rmfiles} $2 $3"
184  tlabel=$4
185  error=$5
186  cmd="$1 > $2 2> $3"
187  if test -n "$error"; then
188    cmd="$1 1> $2  2>&1"
189  fi
190  echo "$cmd" > ${tlabel}.sh; chmod 755 ${tlabel}.sh
191  if $printcmd; then
192     printcmd "$cmd"
193  fi
194
195  eval "{ time -p $cmd ; } 2>> timing.out"
196  cmd_res=$?
197  # If testing the error output then we don't test the error code itself
198  if test -n "$error"; then
199     cmd_res=0
200  fi
201  #  If it is a lack of GPU resources or MPI failure (Intel) then try once more
202  #  See: src/sys/error/err.c
203  #  Error #134 added to handle problems with the Radeon card for hip testing
204  #  Error #144 added to handle problems with the MPI [ch3:sock] received packet of unknown type (1852472100)
205  if [ $cmd_res -eq 96 -o $cmd_res -eq 97 -o $cmd_res -eq 98 -o $cmd_res -eq 134 -o $cmd_res -eq 144 ]; then
206    printf "# retrying ${tlabel}\n" | tee -a ${testlogerrfile}
207    sleep 3
208    eval "{ time -p $cmd ; } 2>> timing.out"
209    cmd_res=$?
210  fi
211  touch "$2" "$3"
212  # It appears current MPICH and Open MPI just shut down the job execution and do not return an error code to the executable
213  # ETIMEDOUT=110 was used by Open MPI 3.0.  MPICH used 255
214  # Earlier Open MPI versions returned 1 and the error string
215  # Here we only grep for error strings in output
216  #if [ $cmd_res -eq 110 -o $cmd_res -eq 255 ] || \
217  if \
218        grep -F -q -s 'I_MPI_JOB_TIMEOUT' "$2" "$3" || \
219        grep -F -q -s 'APPLICATION TIMED OUT' "$2" "$3" || \
220        grep -F -q -s MPIEXEC_TIMEOUT "$2" "$3" || \
221        grep -F -q -s 'APPLICATION TERMINATED WITH THE EXIT STRING: job ending due to timeout' "$2" "$3" || \
222        grep -q -s "Timeout after [0-9]* seconds. Terminating job" "$2" "$3"; then
223    timed_out=1
224    # If timed out, then ensure non-zero error code
225    if [ $cmd_res -eq 0 ]; then
226      cmd_res=1
227    fi
228  fi
229
230  # Report errors
231  comment=""
232  if test $cmd_res == 0; then
233     if "${verbose}"; then
234        comment="${cmd}"
235     fi
236    petsc_report_tapoutput "" "$tlabel" "$comment"
237    let success=$success+1
238  else
239    if [ -n "$timed_out" ]; then
240      comment="Exceeded timeout limit of $MPIEXEC_TIMEOUT s"
241    else
242      comment="Error code: ${cmd_res}"
243    fi
244    petsc_report_tapoutput "not" "$tlabel" "$comment"
245
246    # Report errors in detail
247    if [ -z "$timed_out" ]; then
248      # We've had tests fail but stderr->stdout, as well as having
249      # mpi_abort go to stderr which throws this test off.  Show both
250      # with stdout first
251      awk '{print "#\t" $0}' < $2 | tee -a ${testlogerrfile}
252      # if statement is for diff tests
253      if test "$2" != "$3"; then
254        awk '{print "#\t" $0}' < $3 | tee -a ${testlogerrfile}
255      fi
256    fi
257    let failed=$failed+1
258    failures="$failures $tlabel"
259  fi
260  let total=$success+$failed
261  return $cmd_res
262}
263
264function petsc_testend() {
265  logfile=$1/counts/${label}.counts
266  logdir=`dirname $logfile`
267  if ! test -d "$logdir"; then
268    mkdir -p $logdir
269  fi
270  if ! test -e "$logfile"; then
271    touch $logfile
272  fi
273  printf "total $total\n" > $logfile
274  printf "success $success\n" >> $logfile
275  printf "failed $failed\n" >> $logfile
276  printf "failures $failures\n" >> $logfile
277  if test ${todo} -gt 0; then
278    printf "todo $todo\n" >> $logfile
279  fi
280  if test ${skip} -gt 0; then
281    printf "skip $skip\n" >> $logfile
282  fi
283  ENDTIME=`date +%s`
284  timing=`touch timing.out && grep -E '(user|sys)' timing.out | awk '{if( sum1 == "" || $2 > sum1 ) { sum1=sprintf("%.2f",$2) } ; sum2 += sprintf("%.2f",$2)} END {printf "%.2f %.2f\n",sum1,sum2}'`
285  printf "time $timing\n" >> $logfile
286  if $cleanup; then
287    echo "Cleaning up"
288    /bin/rm -f $rmfiles
289  fi
290}
291
292function petsc_mpiexec_cudamemcheck() {
293  # loops over the argument list to find the call to the test executable and insert the
294  # cuda memcheck command before it.
295  # first check if compute-sanitizer exists, since cuda-memcheck is deprecated from CUDA
296  # 11-ish onwards
297  if command -v compute-sanitizer &> /dev/null; then
298    memcheck_cmd="${PETSC_CUDAMEMCHECK_COMMAND:-compute-sanitizer}"
299    declare -a default_args_to_check=('--target-processes all' '--track-stream-ordered-races all')
300  else
301    memcheck_cmd="${PETSC_CUDAMEMCHECK_COMMAND:-cuda-memcheck}"
302    declare -a default_args_to_check=('--flush-to-disk yes')
303  fi
304  if [[ -z ${PETSC_CUDAMEMCHECK_ARGS} ]]; then
305    # if user has not set the memcheck args themselves loop over the predefined default
306    # arguments and check if they can be used
307    memcheck_args='--leak-check full --report-api-errors no '
308    for option in "${default_args_to_check[@]}"; do
309      ${memcheck_cmd} ${memcheck_args} ${option} &> /dev/null
310      if [ $? -eq 0 ]; then
311        memcheck_args+="${option} "
312      fi
313    done
314  else
315    memcheck_args="${PETSC_CUDAMEMCHECK_ARGS}"
316  fi
317  pre_args=()
318  # regex to detect where the test lives in the command line. This
319  # marks the end of the options to mpiexec, and hence where we should insert the
320  # cuda-memcheck command
321  re="${executable}"
322  for i in "$@"; do
323    # first occurrence of the presence of petsc_arch is the executable,
324    # except when we install MPI ourselves
325    if [[ $i =~ ${re} ]]; then
326      # found it, put cuda memcheck command in
327      pre_args+=("${memcheck_cmd} ${memcheck_args}")
328      break
329    fi
330    pre_args+=("$i")
331    shift
332  done
333  # run command, but filter out
334  # ===== CUDA-MEMCHECK or ==== COMPUTE-SANITIZER
335  # and
336  # ===== ERROR SUMMARY: 0 errors
337  if ${printcmd}; then
338    echo ${pre_args[@]} "$@"
339  else
340    ${pre_args[@]} "$@" \
341      | grep -v 'CUDA-MEMCHECK' \
342      | grep -v 'COMPUTE-SANITIZER' \
343      | grep -v 'LEAK SUMMARY: 0 bytes leaked in 0 allocations' \
344      | grep -v 'ERROR SUMMARY: 0 errors' || [[ $? == 1 ]]
345  fi
346  # last or is needed to suppress grep exiting with error code 1 if it doesn't find a
347  # match
348}
349
350function petsc_mpiexec_valgrind() {
351  valgrind_cmd="valgrind -q --tool=memcheck --leak-check=yes --num-callers=20 --track-origins=yes --keep-debuginfo=yes --suppressions=${PETSC_DIR}/share/petsc/suppressions/valgrind --error-exitcode=10"
352  pre_args=()
353  re="${executable}"
354  for i in "$@"; do
355    if [[ $i =~ ${re} ]]; then
356      pre_args+=("${valgrind_cmd}")
357      break
358    fi
359    pre_args+=("$i")
360    shift
361  done
362  if ${printcmd}; then
363    echo ${pre_args[@]} "$@"
364  else
365    ${pre_args[@]} "$@"
366  fi
367}
368export LC_ALL=C
369