1scriptname=`basename $0` 2rundir=${scriptname%.sh} 3TIMEOUT=60 4 5if test "$PWD"!=`dirname $0`; then 6 cd `dirname $0` 7 abspath_scriptdir=$PWD 8fi 9if test -d "${rundir}" && test -n "${rundir}"; then 10 rm -f ${rundir}/*.tmp ${rundir}/*.err ${rundir}/*.out 11fi 12mkdir -p ${rundir} 13if test -n "${runfiles}"; then 14 for runfile in ${runfiles}; do 15 subdir=`dirname ${runfile}` 16 mkdir -p ${rundir}/${subdir} 17 cp -r ${runfile} ${rundir}/${subdir} 18 done 19fi 20cd ${rundir} 21 22# 23# Method to print out general and script specific options 24# 25print_usage() { 26 27cat >&2 <<EOF 28Usage: $0 [options] 29 30OPTIONS 31 -a <args> ......... Override default arguments 32 -c ................ Cleanup (remove generated files) 33 -C ................ Compile 34 -d ................ Launch in debugger 35 -e <args> ......... Add extra arguments to default 36 -E <args> ......... Add final arguments to default 37 -f ................ force attempt to run test that would otherwise be skipped 38 -h ................ help: print this message 39 -n <integer> ...... Override the number of processors to use 40 -j ................ Pass -j to petscdiff (just use diff) 41 -J <arg> .......... Pass -J to petscdiff (just use diff with arg) 42 -m ................ Update results using petscdiff 43 -M ................ Update alt files using petscdiff 44 -o <arg> .......... Output format: 'interactive', 'err_only' 45 -p ................ Print command: Print first command and exit 46 -t ................ Override the default timeout (default=$TIMEOUT sec) 47 -U ................ run cUda-memcheck 48 -V ................ run Valgrind 49 -v ................ Verbose: Print commands 50EOF 51 52 if declare -f extrausage > /dev/null; then extrausage; fi 53 exit $1 54} 55### 56## Arguments for overriding things 57# 58output_fmt="interactive" 59verbose=false 60cleanup=false 61compile=false 62debugger=false 63printcmd=false 64mpiexec_function=false 65force=false 66diff_flags="" 67while getopts "a:cCde:E:fhjJ:mMn:o:pt:UvV" arg 68do 69 case $arg in 70 a ) args="$OPTARG" ;; 71 c ) cleanup=true ;; 72 C ) compile=true ;; 73 d ) debugger=true ;; 74 e ) extra_args="$OPTARG" ;; 75 E ) final_args="$OPTARG" ;; 76 f ) force=true ;; 77 h ) print_usage; exit ;; 78 n ) nsize="$OPTARG" ;; 79 j ) diff_flags=$diff_flags" -j" ;; 80 J ) diff_flags=$diff_flags" -J $OPTARG" ;; 81 m ) diff_flags=$diff_flags" -m" ;; 82 M ) diff_flags=$diff_flags" -M" ;; 83 o ) output_fmt=$OPTARG ;; 84 p ) printcmd=true ;; 85 t ) TIMEOUT=$OPTARG ;; 86 U ) mpiexec="petsc_mpiexec_cudamemcheck $mpiexec" 87 mpiexec_function=true 88 ;; 89 V ) mpiexec="petsc_mpiexec_valgrind $mpiexec" 90 mpiexec_function=true 91 ;; 92 v ) verbose=true ;; 93 *) # To take care of any extra args 94 if test -n "$OPTARG"; then 95 eval $arg=\"$OPTARG\" 96 else 97 eval $arg=found 98 fi 99 ;; 100 esac 101done 102shift $(( $OPTIND - 1 )) 103 104# Individual tests can extend the default 105export MPIEXEC_TIMEOUT=$((TIMEOUT*timeoutfactor)) 106STARTTIME=`date +%s` 107 108if test -n "$extra_args"; then 109 args="$extra_args $args" 110fi 111if test -n "$final_args"; then 112 args="$args $final_args" 113fi 114if $debugger; then 115 args="-start_in_debugger $args" 116fi 117if test -n "$filter"; then 118 diff_flags=$diff_flags" -F \$'$filter'" 119fi 120if test -n "$filter_output"; then 121 diff_flags=$diff_flags" -f \$'$filter_output'" 122fi 123 124# Init 125success=0; failed=0; failures=""; rmfiles="" 126total=0 127todo=-1; skip=-1 128job_level=0 129 130if $compile; then 131 curexec=`basename ${exec}` 132 fullexec=${abspath_scriptdir}/${curexec} 133 maketarget=`echo ${fullexec} | sed "s#${petsc_dir}/*##"` 134 (cd $petsc_dir && make -f gmakefile.test ${maketarget}) 135fi 136 137### 138## Rest of code is functions 139# 140function petsc_report_tapoutput() { 141 notornot=$1 142 test_label=$2 143 comment=$3 144 if test -n "$comment"; then 145 comment=" # ${comment}" 146 fi 147 148 tap_message="${notornot} ok ${test_label}${comment}" 149 150 # Log messages 151 printf "${tap_message}\n" >> ${testlogtapfile} 152 153 if test ${output_fmt} == "err_only"; then 154 if test -n "${notornot}"; then 155 printf "${tap_message}\n" | tee -a ${testlogerrfile} 156 fi 157 else 158 printf "${tap_message}\n" 159 fi 160} 161 162function printcmd() { 163 # Print command that can be run from PETSC_DIR 164 cmd="$1" 165 basedir=`dirname ${PWD} | sed "s#${petsc_dir}/##"` 166 modcmd=`echo ${cmd} | sed -e "s#\.\.#${basedir}#" | sed s#\>.*## | sed s#\%#\%\%#` 167 if $mpiexec_function; then 168 # Have to expand valgrind/cudamemcheck 169 modcmd=`eval "$modcmd"` 170 fi 171 printf "${modcmd}\n" 172 exit 173} 174 175function petsc_testrun() { 176 # First arg = Basic command 177 # Second arg = stdout file 178 # Third arg = stderr file 179 # Fourth arg = label for reporting 180 rmfiles="${rmfiles} $2 $3" 181 tlabel=$4 182 error=$5 183 cmd="$1 > $2 2> $3" 184 if test -n "$error"; then 185 cmd="$1 1> $2 2>&1" 186 fi 187 echo "$cmd" > ${tlabel}.sh; chmod 755 ${tlabel}.sh 188 if $printcmd; then 189 printcmd "$cmd" 190 fi 191 192 eval "{ time -p $cmd ; } 2>> timing.out" 193 cmd_res=$? 194 # If testing the error output then we don't test the error code itself 195 if test -n "$error"; then 196 cmd_res=0 197 fi 198 # If it is a lack of GPU resources or MPI failure (Intel) then try once more 199 # See: src/sys/error/err.c 200 # Error #134 added to handle problems with the Radeon card for hip testing 201 # Error #144 added to handle problems with the MPI [ch3:sock] received packet of unknown type (1852472100) 202 if [ $cmd_res -eq 96 -o $cmd_res -eq 97 -o $cmd_res -eq 98 -o $cmd_res -eq 134 -o $cmd_res -eq 144 ]; then 203 printf "# retrying ${tlabel}\n" | tee -a ${testlogerrfile} 204 sleep 3 205 eval "{ time -p $cmd ; } 2>> timing.out" 206 cmd_res=$? 207 fi 208 touch "$2" "$3" 209 # It appears current MPICH and Open MPI just shut down the job execution and do not return an error code to the executable 210 # ETIMEDOUT=110 was used by Open MPI 3.0. MPICH used 255 211 # Earlier Open MPI versions returned 1 and the error string 212 # Here we only grep for error strings in output 213 #if [ $cmd_res -eq 110 -o $cmd_res -eq 255 ] || \ 214 if \ 215 grep -F -q -s 'I_MPI_JOB_TIMEOUT' "$2" "$3" || \ 216 grep -F -q -s 'APPLICATION TIMED OUT' "$2" "$3" || \ 217 grep -F -q -s MPIEXEC_TIMEOUT "$2" "$3" || \ 218 grep -F -q -s 'APPLICATION TERMINATED WITH THE EXIT STRING: job ending due to timeout' "$2" "$3" || \ 219 grep -q -s "Timeout after [0-9]* seconds. Terminating job" "$2" "$3"; then 220 timed_out=1 221 # If timed out, then ensure non-zero error code 222 if [ $cmd_res -eq 0 ]; then 223 cmd_res=1 224 fi 225 fi 226 227 # Report errors 228 comment="" 229 if test $cmd_res == 0; then 230 if "${verbose}"; then 231 comment="${cmd}" 232 fi 233 petsc_report_tapoutput "" "$tlabel" "$comment" 234 let success=$success+1 235 else 236 if [ -n "$timed_out" ]; then 237 comment="Exceeded timeout limit of $MPIEXEC_TIMEOUT s" 238 else 239 comment="Error code: ${cmd_res}" 240 fi 241 petsc_report_tapoutput "not" "$tlabel" "$comment" 242 243 # Report errors in detail 244 if [ -z "$timed_out" ]; then 245 # We've had tests fail but stderr->stdout, as well as having 246 # mpi_abort go to stderr which throws this test off. Show both 247 # with stdout first 248 awk '{print "#\t" $0}' < $2 | tee -a ${testlogerrfile} 249 # if statement is for diff tests 250 if test "$2" != "$3"; then 251 awk '{print "#\t" $0}' < $3 | tee -a ${testlogerrfile} 252 fi 253 fi 254 let failed=$failed+1 255 failures="$failures $tlabel" 256 fi 257 let total=$success+$failed 258 return $cmd_res 259} 260 261function petsc_testend() { 262 logfile=$1/counts/${label}.counts 263 logdir=`dirname $logfile` 264 if ! test -d "$logdir"; then 265 mkdir -p $logdir 266 fi 267 if ! test -e "$logfile"; then 268 touch $logfile 269 fi 270 printf "total $total\n" > $logfile 271 printf "success $success\n" >> $logfile 272 printf "failed $failed\n" >> $logfile 273 printf "failures $failures\n" >> $logfile 274 if test ${todo} -gt 0; then 275 printf "todo $todo\n" >> $logfile 276 fi 277 if test ${skip} -gt 0; then 278 printf "skip $skip\n" >> $logfile 279 fi 280 ENDTIME=`date +%s` 281 timing=`touch timing.out && grep -E '(user|sys)' timing.out | awk '{if( sum1 == "" || $2 > sum1 ) { sum1=sprintf("%.2f",$2) } ; sum2 += sprintf("%.2f",$2)} END {printf "%.2f %.2f\n",sum1,sum2}'` 282 printf "time $timing\n" >> $logfile 283 if $cleanup; then 284 echo "Cleaning up" 285 /bin/rm -f $rmfiles 286 fi 287} 288 289function petsc_mpiexec_cudamemcheck() { 290 # loops over the argument list to find the call to the test executable and insert the 291 # cuda memcheck command before it. 292 # first check if compute-sanitizer exists, since cuda-memcheck is deprecated from CUDA 293 # 11-ish onwards 294 if command -v compute-sanitizer &> /dev/null; then 295 memcheck_cmd="${PETSC_CUDAMEMCHECK_COMMAND:-compute-sanitizer}" 296 declare -a default_args_to_check=('--target-processes all' '--track-stream-ordered-races all') 297 else 298 memcheck_cmd="${PETSC_CUDAMEMCHECK_COMMAND:-cuda-memcheck}" 299 declare -a default_args_to_check=('--flush-to-disk yes') 300 fi 301 if [[ -z ${PETSC_CUDAMEMCHECK_ARGS} ]]; then 302 # if user has not set the memcheck args themselves loop over the predefined default 303 # arguments and check if they can be used 304 memcheck_args='--leak-check full --report-api-errors no ' 305 for option in "${default_args_to_check[@]}"; do 306 ${memcheck_cmd} ${memcheck_args} ${option} &> /dev/null 307 if [ $? -eq 0 ]; then 308 memcheck_args+="${option} " 309 fi 310 done 311 else 312 memcheck_args="${PETSC_CUDAMEMCHECK_ARGS}" 313 fi 314 pre_args=() 315 # regex to detect where the test lives in the command line. This 316 # marks the end of the options to mpiexec, and hence where we should insert the 317 # cuda-memcheck command 318 re="${executable}" 319 for i in "$@"; do 320 # first occurrence of the presence of petsc_arch is the executable, 321 # except when we install MPI ourselves 322 if [[ $i =~ ${re} ]]; then 323 # found it, put cuda memcheck command in 324 pre_args+=("${memcheck_cmd} ${memcheck_args}") 325 break 326 fi 327 pre_args+=("$i") 328 shift 329 done 330 # run command, but filter out 331 # ===== CUDA-MEMCHECK or ==== COMPUTE-SANITIZER 332 # and 333 # ===== ERROR SUMMARY: 0 errors 334 if ${printcmd}; then 335 echo ${pre_args[@]} "$@" 336 else 337 ${pre_args[@]} "$@" \ 338 | grep -v 'CUDA-MEMCHECK' \ 339 | grep -v 'COMPUTE-SANITIZER' \ 340 | grep -v 'LEAK SUMMARY: 0 bytes leaked in 0 allocations' \ 341 | grep -v 'ERROR SUMMARY: 0 errors' || [[ $? == 1 ]] 342 fi 343 # last or is needed to suppress grep exiting with error code 1 if it doesn't find a 344 # match 345} 346 347function petsc_mpiexec_valgrind() { 348 valgrind_cmd="valgrind -q --tool=memcheck --leak-check=yes --num-callers=20 --track-origins=yes --keep-debuginfo=yes --suppressions=${PETSC_DIR}/share/petsc/suppressions/valgrind --error-exitcode=10" 349 pre_args=() 350 re="${executable}" 351 for i in "$@"; do 352 if [[ $i =~ ${re} ]]; then 353 pre_args+=("${valgrind_cmd}") 354 break 355 fi 356 pre_args+=("$i") 357 shift 358 done 359 if ${printcmd}; then 360 echo ${pre_args[@]} "$@" 361 else 362 ${pre_args[@]} "$@" 363 fi 364} 365export LC_ALL=C 366