xref: /petsc/config/petsc_harness.sh (revision 9fa15c4dcfb14eef1e694c8c4e3db8b9dee7e944)
1scriptname=`basename $0`
2rundir=${scriptname%.sh}
3TIMEOUT=60
4
5if test "$PWD"!=`dirname $0`; then
6  cd `dirname $0`
7  abspath_scriptdir=$PWD
8fi
9if test -d "${rundir}" && test -n "${rundir}"; then
10  rm -f ${rundir}/*.tmp ${rundir}/*.err ${rundir}/*.out
11fi
12mkdir -p ${rundir}
13if test -n "${runfiles}"; then
14  for runfile in ${runfiles}; do
15      subdir=`dirname ${runfile}`
16      mkdir -p ${rundir}/${subdir}
17      cp -r ${runfile} ${rundir}/${subdir}
18  done
19fi
20cd ${rundir}
21
22#
23# Method to print out general and script specific options
24#
25print_usage() {
26
27cat >&2 <<EOF
28Usage: $0 [options]
29
30OPTIONS
31  -a <args> ......... Override default arguments
32  -c ................ Cleanup (remove generated files)
33  -C ................ Compile
34  -d ................ Launch in debugger
35  -e <args> ......... Add extra arguments to default
36  -E <args> ......... Add final arguments to default
37  -f ................ force attempt to run test that would otherwise be skipped
38  -h ................ help: print this message
39  -n <integer> ...... Override the number of processors to use
40  -j ................ Pass -j to petscdiff (just use diff)
41  -J <arg> .......... Pass -J to petscdiff (just use diff with arg)
42  -m ................ Update results using petscdiff
43  -M ................ Update alt files using petscdiff
44  -o <arg> .......... Output format: 'interactive', 'err_only'
45  -p ................ Print command:  Print first command and exit
46  -t ................ Override the default timeout (default=$TIMEOUT sec)
47  -U ................ run cUda-memcheck
48  -V ................ run Valgrind
49  -v ................ Verbose: Print commands
50EOF
51
52  if declare -f extrausage > /dev/null; then extrausage; fi
53  exit $1
54}
55###
56##  Arguments for overriding things
57#
58output_fmt="interactive"
59verbose=false
60cleanup=false
61compile=false
62debugger=false
63printcmd=false
64mpiexec_function=false
65force=false
66diff_flags=""
67while getopts "a:cCde:E:fhjJ:mMn:o:pt:UvV" arg
68do
69  case $arg in
70    a ) args="$OPTARG"       ;;
71    c ) cleanup=true         ;;
72    C ) compile=true         ;;
73    d ) debugger=true        ;;
74    e ) extra_args="$OPTARG" ;;
75    E ) final_args="$OPTARG" ;;
76    f ) force=true           ;;
77    h ) print_usage; exit    ;;
78    n ) nsize="$OPTARG"      ;;
79    j ) diff_flags=$diff_flags" -j"      ;;
80    J ) diff_flags=$diff_flags" -J $OPTARG" ;;
81    m ) diff_flags=$diff_flags" -m"      ;;
82    M ) diff_flags=$diff_flags" -M"      ;;
83    o ) output_fmt=$OPTARG   ;;
84    p ) printcmd=true        ;;
85    t ) TIMEOUT=$OPTARG      ;;
86    U ) mpiexec="petsc_mpiexec_cudamemcheck $mpiexec"
87        mpiexec_function=true
88        ;;
89    V ) mpiexec="petsc_mpiexec_valgrind $mpiexec"
90        mpiexec_function=true
91        ;;
92    v ) verbose=true         ;;
93    *)  # To take care of any extra args
94      if test -n "$OPTARG"; then
95        eval $arg=\"$OPTARG\"
96      else
97        eval $arg=found
98      fi
99      ;;
100  esac
101done
102shift $(( $OPTIND - 1 ))
103
104# Individual tests can extend the default
105export MPIEXEC_TIMEOUT=$((TIMEOUT*timeoutfactor))
106STARTTIME=`date +%s`
107
108if test -n "$extra_args"; then
109  args="$extra_args $args"
110fi
111if test -n "$final_args"; then
112  args="$args $final_args"
113fi
114if $debugger; then
115  args="-start_in_debugger $args"
116fi
117if test -n "$filter"; then
118  diff_flags=$diff_flags" -F \$'$filter'"
119fi
120if test -n "$filter_output"; then
121  diff_flags=$diff_flags" -f \$'$filter_output'"
122fi
123
124# Init
125success=0; failed=0; failures=""; rmfiles=""
126total=0
127todo=-1; skip=-1
128job_level=0
129
130if $compile; then
131   curexec=`basename ${exec}`
132   fullexec=${abspath_scriptdir}/${curexec}
133   maketarget=`echo ${fullexec} | sed "s#${petsc_dir}/*##"`
134   (cd $petsc_dir && make -f gmakefile.test ${maketarget})
135fi
136
137###
138##   Rest of code is functions
139#
140function petsc_report_tapoutput() {
141  notornot=$1
142  test_label=$2
143  comment=$3
144  if test -n "$comment"; then
145    comment=" # ${comment}"
146  fi
147
148  tap_message="${notornot} ok ${test_label}${comment}"
149
150  # Log messages
151  printf "${tap_message}\n" >> ${testlogtapfile}
152
153  if test ${output_fmt} == "err_only"; then
154     if test -n "${notornot}"; then
155        printf "${tap_message}\n" | tee -a ${testlogerrfile}
156     fi
157  else
158     printf "${tap_message}\n"
159  fi
160}
161
162function printcmd() {
163  # Print command that can be run from PETSC_DIR
164  cmd="$1"
165  basedir=`dirname ${PWD} | sed "s#${petsc_dir}/##"`
166  modcmd=`echo ${cmd} | sed -e "s#\.\.#${basedir}#" | sed s#\>.*## | sed s#\%#\%\%#`
167  if $mpiexec_function; then
168     # Have to expand valgrind/cudamemcheck
169     modcmd=`eval "$modcmd"`
170  fi
171  printf "${modcmd}\n"
172  exit
173}
174
175function petsc_testrun() {
176  # First arg = Basic command
177  # Second arg = stdout file
178  # Third arg = stderr file
179  # Fourth arg = label for reporting
180  rmfiles="${rmfiles} $2 $3"
181  tlabel=$4
182  error=$5
183  cmd="$1 > $2 2> $3"
184  if test -n "$error"; then
185    cmd="$1 1> $2  2>&1"
186  fi
187  echo "$cmd" > ${tlabel}.sh; chmod 755 ${tlabel}.sh
188  if $printcmd; then
189     printcmd "$cmd"
190  fi
191
192  eval "{ time -p $cmd ; } 2>> timing.out"
193  cmd_res=$?
194  # If testing the error output then we don't test the error code itself
195  if test -n "$error"; then
196     cmd_res=0
197  fi
198  #  If it is a lack of GPU resources or MPI failure (Intel) then try once more
199  #  See: src/sys/error/err.c
200  #  Error #134 added to handle problems with the Radeon card for hip testing
201  #  Error #144 added to handle problems with the MPI [ch3:sock] received packet of unknown type (1852472100)
202  if [ $cmd_res -eq 96 -o $cmd_res -eq 97 -o $cmd_res -eq 98 -o $cmd_res -eq 134 -o $cmd_res -eq 144 ]; then
203    printf "# retrying ${tlabel}\n" | tee -a ${testlogerrfile}
204    sleep 3
205    eval "{ time -p $cmd ; } 2>> timing.out"
206    cmd_res=$?
207  fi
208  touch "$2" "$3"
209  # It appears current MPICH and Open MPI just shut down the job execution and do not return an error code to the executable
210  # ETIMEDOUT=110 was used by Open MPI 3.0.  MPICH used 255
211  # Earlier Open MPI versions returned 1 and the error string
212  # Here we only grep for error strings in output
213  #if [ $cmd_res -eq 110 -o $cmd_res -eq 255 ] || \
214  if \
215        grep -F -q -s 'I_MPI_JOB_TIMEOUT' "$2" "$3" || \
216        grep -F -q -s 'APPLICATION TIMED OUT' "$2" "$3" || \
217        grep -F -q -s MPIEXEC_TIMEOUT "$2" "$3" || \
218        grep -F -q -s 'APPLICATION TERMINATED WITH THE EXIT STRING: job ending due to timeout' "$2" "$3" || \
219        grep -q -s "Timeout after [0-9]* seconds. Terminating job" "$2" "$3"; then
220    timed_out=1
221    # If timed out, then ensure non-zero error code
222    if [ $cmd_res -eq 0 ]; then
223      cmd_res=1
224    fi
225  fi
226
227  # Report errors
228  comment=""
229  if test $cmd_res == 0; then
230     if "${verbose}"; then
231        comment="${cmd}"
232     fi
233    petsc_report_tapoutput "" "$tlabel" "$comment"
234    let success=$success+1
235  else
236    if [ -n "$timed_out" ]; then
237      comment="Exceeded timeout limit of $MPIEXEC_TIMEOUT s"
238    else
239      comment="Error code: ${cmd_res}"
240    fi
241    petsc_report_tapoutput "not" "$tlabel" "$comment"
242
243    # Report errors in detail
244    if [ -z "$timed_out" ]; then
245      # We've had tests fail but stderr->stdout, as well as having
246      # mpi_abort go to stderr which throws this test off.  Show both
247      # with stdout first
248      awk '{print "#\t" $0}' < $2 | tee -a ${testlogerrfile}
249      # if statement is for diff tests
250      if test "$2" != "$3"; then
251        awk '{print "#\t" $0}' < $3 | tee -a ${testlogerrfile}
252      fi
253    fi
254    let failed=$failed+1
255    failures="$failures $tlabel"
256  fi
257  let total=$success+$failed
258  return $cmd_res
259}
260
261function petsc_testend() {
262  logfile=$1/counts/${label}.counts
263  logdir=`dirname $logfile`
264  if ! test -d "$logdir"; then
265    mkdir -p $logdir
266  fi
267  if ! test -e "$logfile"; then
268    touch $logfile
269  fi
270  printf "total $total\n" > $logfile
271  printf "success $success\n" >> $logfile
272  printf "failed $failed\n" >> $logfile
273  printf "failures $failures\n" >> $logfile
274  if test ${todo} -gt 0; then
275    printf "todo $todo\n" >> $logfile
276  fi
277  if test ${skip} -gt 0; then
278    printf "skip $skip\n" >> $logfile
279  fi
280  ENDTIME=`date +%s`
281  timing=`touch timing.out && grep -E '(user|sys)' timing.out | awk '{if( sum1 == "" || $2 > sum1 ) { sum1=sprintf("%.2f",$2) } ; sum2 += sprintf("%.2f",$2)} END {printf "%.2f %.2f\n",sum1,sum2}'`
282  printf "time $timing\n" >> $logfile
283  if $cleanup; then
284    echo "Cleaning up"
285    /bin/rm -f $rmfiles
286  fi
287}
288
289function petsc_mpiexec_cudamemcheck() {
290  # loops over the argument list to find the call to the test executable and insert the
291  # cuda memcheck command before it.
292  # first check if compute-sanitizer exists, since cuda-memcheck is deprecated from CUDA
293  # 11-ish onwards
294  if command -v compute-sanitizer &> /dev/null; then
295    memcheck_cmd="${PETSC_CUDAMEMCHECK_COMMAND:-compute-sanitizer}"
296    declare -a default_args_to_check=('--target-processes all' '--track-stream-ordered-races all')
297  else
298    memcheck_cmd="${PETSC_CUDAMEMCHECK_COMMAND:-cuda-memcheck}"
299    declare -a default_args_to_check=('--flush-to-disk yes')
300  fi
301  if [[ -z ${PETSC_CUDAMEMCHECK_ARGS} ]]; then
302    # if user has not set the memcheck args themselves loop over the predefined default
303    # arguments and check if they can be used
304    memcheck_args='--leak-check full --report-api-errors no '
305    for option in "${default_args_to_check[@]}"; do
306      ${memcheck_cmd} ${memcheck_args} ${option} &> /dev/null
307      if [ $? -eq 0 ]; then
308        memcheck_args+="${option} "
309      fi
310    done
311  else
312    memcheck_args="${PETSC_CUDAMEMCHECK_ARGS}"
313  fi
314  pre_args=()
315  # regex to detect where the test lives in the command line. This
316  # marks the end of the options to mpiexec, and hence where we should insert the
317  # cuda-memcheck command
318  re="${executable}"
319  for i in "$@"; do
320    # first occurrence of the presence of petsc_arch is the executable,
321    # except when we install MPI ourselves
322    if [[ $i =~ ${re} ]]; then
323      # found it, put cuda memcheck command in
324      pre_args+=("${memcheck_cmd} ${memcheck_args}")
325      break
326    fi
327    pre_args+=("$i")
328    shift
329  done
330  # run command, but filter out
331  # ===== CUDA-MEMCHECK or ==== COMPUTE-SANITIZER
332  # and
333  # ===== ERROR SUMMARY: 0 errors
334  if ${printcmd}; then
335    echo ${pre_args[@]} "$@"
336  else
337    ${pre_args[@]} "$@" \
338      | grep -v 'CUDA-MEMCHECK' \
339      | grep -v 'COMPUTE-SANITIZER' \
340      | grep -v 'LEAK SUMMARY: 0 bytes leaked in 0 allocations' \
341      | grep -v 'ERROR SUMMARY: 0 errors' || [[ $? == 1 ]]
342  fi
343  # last or is needed to suppress grep exiting with error code 1 if it doesn't find a
344  # match
345}
346
347function petsc_mpiexec_valgrind() {
348  valgrind_cmd="valgrind -q --tool=memcheck --leak-check=yes --num-callers=20 --track-origins=yes --keep-debuginfo=yes --suppressions=${PETSC_DIR}/share/petsc/suppressions/valgrind --error-exitcode=10"
349  pre_args=()
350  re="${executable}"
351  for i in "$@"; do
352    if [[ $i =~ ${re} ]]; then
353      pre_args+=("${valgrind_cmd}")
354      break
355    fi
356    pre_args+=("$i")
357    shift
358  done
359  if ${printcmd}; then
360    echo ${pre_args[@]} "$@"
361  else
362    ${pre_args[@]} "$@"
363  fi
364}
365export LC_ALL=C
366