diff options
Diffstat (limited to 'tests/test-runner/bin/test-runner.py.in')
| -rwxr-xr-x | tests/test-runner/bin/test-runner.py.in | 285 |
1 files changed, 205 insertions, 80 deletions
diff --git a/tests/test-runner/bin/test-runner.py.in b/tests/test-runner/bin/test-runner.py.in index cb453b266f3c..6688b6c4beb6 100755 --- a/tests/test-runner/bin/test-runner.py.in +++ b/tests/test-runner/bin/test-runner.py.in @@ -1,4 +1,5 @@ #!/usr/bin/env @PYTHON_SHEBANG@ +# SPDX-License-Identifier: CDDL-1.0 # # This file and its contents are supplied under the terms of the @@ -14,6 +15,7 @@ # # Copyright (c) 2012, 2018 by Delphix. All rights reserved. # Copyright (c) 2019 Datto Inc. +# Copyright (c) 2025, Klara, Inc. # # This script must remain compatible with Python 3.6+. # @@ -23,6 +25,7 @@ import sys import ctypes import re import configparser +import traceback from datetime import datetime from optparse import OptionParser @@ -32,6 +35,7 @@ from select import select from subprocess import PIPE from subprocess import Popen from subprocess import check_output +from subprocess import run from threading import Timer from time import time, CLOCK_MONOTONIC from os.path import exists @@ -47,25 +51,25 @@ LOG_OUT = 'LOG_OUT' LOG_ERR = 'LOG_ERR' LOG_FILE_OBJ = None +try: + from time import monotonic as monotonic_time +except ImportError: + class timespec(ctypes.Structure): + _fields_ = [ + ('tv_sec', ctypes.c_long), + ('tv_nsec', ctypes.c_long) + ] -class timespec(ctypes.Structure): - _fields_ = [ - ('tv_sec', ctypes.c_long), - ('tv_nsec', ctypes.c_long) - ] - - -librt = ctypes.CDLL('librt.so.1', use_errno=True) -clock_gettime = librt.clock_gettime -clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)] - + librt = ctypes.CDLL('librt.so.1', use_errno=True) + clock_gettime = librt.clock_gettime + clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)] -def monotonic_time(): - t = timespec() - if clock_gettime(CLOCK_MONOTONIC, ctypes.pointer(t)) != 0: - errno_ = ctypes.get_errno() - raise OSError(errno_, os.strerror(errno_)) - return t.tv_sec + t.tv_nsec * 1e-9 + def monotonic_time(): + t = timespec() + if clock_gettime(CLOCK_MONOTONIC, ctypes.pointer(t)) != 0: + errno_ = ctypes.get_errno() + raise OSError(errno_, os.strerror(errno_)) + return t.tv_sec + t.tv_nsec * 1e-9 class Result(object): @@ -111,10 +115,11 @@ class Result(object): class Output(object): """ This class is a slightly modified version of the 'Stream' class found - here: http://goo.gl/aSGfv + here: https://stackoverflow.com/q/4984549/ """ - def __init__(self, stream): + def __init__(self, stream, debug=False): self.stream = stream + self.debug = debug self._buf = b'' self.lines = [] @@ -140,6 +145,8 @@ class Output(object): buf = os.read(fd, 4096) if not buf: return None + if self.debug: + os.write(sys.stderr.fileno(), buf) if b'\n' not in buf: self._buf += buf return [] @@ -181,7 +188,64 @@ Timeout: %d User: %s ''' % (self.pathname, self.identifier, self.outputdir, self.timeout, self.user) - def kill_cmd(self, proc, keyboard_interrupt=False): + def kill_cmd(self, proc, options, kmemleak, keyboard_interrupt=False): + + """ + We're about to kill a command due to a timeout. + If we're running with the -O option, then dump debug info about the + process with the highest CPU usage to /dev/kmsg (Linux only). This can + help debug the timeout. + + Debug info includes: + - 30 lines from 'top' + - /proc/<PID>/stack output of process with highest CPU usage + - Last lines strace-ing process with highest CPU usage + """ + if exists("/dev/kmsg"): + c = """ +TOP_OUT="$(COLUMNS=160 top -b -n 1 | head -n 30)" +read -r PID CMD <<< $(echo "$TOP_OUT" | /usr/bin/awk \ +"/COMMAND/{ + print_next=1 + next +} +{ + if (print_next == 1) { + print \\$1\\" \\"\\$12 + exit + } +}") +echo "##### ZTS timeout debug #####" +echo "----- top -----" +echo "$TOP_OUT" +echo "----- /proc/$PID/stack ($CMD)) -----" +cat /proc/$PID/stack +echo "----- strace ($CMD) -----" +TMPFILE="$(mktemp --suffix=ZTS)" +/usr/bin/strace -k --stack-traces -p $PID &> "$TMPFILE" & +sleep 0.1 +killall strace +tail -n 30 $TMPFILE +rm "$TMPFILE" +echo "##### /proc/sysrq-trigger stack #####" +""" + c = "sudo bash -c '" + c + "'" + data = run(c, capture_output=True, shell=True, text=True) + out = data.stdout + try: + kp = Popen([SUDO, "sh", "-c", + "echo '" + out + "' > /dev/kmsg"]) + kp.wait() + + """ + Trigger kernel stack traces + """ + kp = Popen([SUDO, "sh", "-c", + "echo l > /proc/sysrq-trigger"]) + kp.wait() + except Exception: + pass + """ Kill a running command due to timeout, or ^C from the keyboard. If sudo is required, this user was verified previously. @@ -211,7 +275,7 @@ User: %s if int(self.timeout) > runtime: self.killed = False self.reran = False - self.run(False) + self.run(options, dryrun=False, kmemleak=kmemleak) self.reran = True def update_cmd_privs(self, cmd, user): @@ -235,17 +299,25 @@ User: %s if os.path.isfile(cmd+'.sh') and os.access(cmd+'.sh', os.X_OK): cmd += '.sh' - ret = '%s -E -u %s %s' % (SUDO, user, cmd) + # glibc (at least) will not pass TMPDIR through to setuid programs. + # if set, arrange for it to be reset before running the target cmd + tmpdir = os.getenv('TMPDIR') + if tmpdir: + tmpdirarg = 'env TMPDIR=%s' % tmpdir + else: + tmpdirarg = '' + + ret = '%s -E -u %s %s %s' % (SUDO, user, tmpdirarg, cmd) return ret.split(' ') - def collect_output(self, proc): + def collect_output(self, proc, debug=False): """ Read from stdout/stderr as data becomes available, until the process is no longer running. Return the lines from the stdout and stderr Output objects. """ - out = Output(proc.stdout) - err = Output(proc.stderr) + out = Output(proc.stdout, debug) + err = Output(proc.stderr, debug) res = [] while proc.returncode is None: proc.poll() @@ -257,15 +329,19 @@ User: %s return out.lines, err.lines - def run(self, dryrun, kmemleak, kmsg): + def run(self, options, dryrun=None, kmemleak=None): """ This is the main function that runs each individual test. Determine whether or not the command requires sudo, and modify it if needed. Run the command, and update the result object. """ + if dryrun is None: + dryrun = options.dryrun if dryrun is True: print(self) return + if kmemleak is None: + kmemleak = options.kmemleak privcmd = self.update_cmd_privs(self.pathname, self.user) try: @@ -280,7 +356,7 @@ User: %s Log each test we run to /dev/kmsg (on Linux), so if there's a kernel warning we'll be able to match it up to a particular test. """ - if kmsg is True and exists("/dev/kmsg"): + if options.kmsg is True and exists("/dev/kmsg"): try: kp = Popen([SUDO, "sh", "-c", f"echo ZTS run {self.pathname} > /dev/kmsg"]) @@ -288,6 +364,18 @@ User: %s except Exception: pass + """ + Log each test we run to /dev/ttyu0 (on FreeBSD), so if there's a kernel + warning we'll be able to match it up to a particular test. + """ + if options.kmsg is True and exists("/dev/ttyu0"): + try: + kp = Popen([SUDO, "sh", "-c", + f"echo ZTS run {self.pathname} > /dev/ttyu0"]) + kp.wait() + except Exception: + pass + self.result.starttime = monotonic_time() if kmemleak: @@ -297,12 +385,17 @@ User: %s proc = Popen(privcmd, stdout=PIPE, stderr=PIPE) # Allow a special timeout value of 0 to mean infinity if int(self.timeout) == 0: - self.timeout = sys.maxsize - t = Timer(int(self.timeout), self.kill_cmd, [proc]) + self.timeout = sys.maxsize / (10 ** 9) + t = Timer( + int(self.timeout), self.kill_cmd, [proc, options, kmemleak] + ) try: t.start() - self.result.stdout, self.result.stderr = self.collect_output(proc) + + out, err = self.collect_output(proc, options.debug) + self.result.stdout = out + self.result.stderr = err if kmemleak: cmd = f'{SUDO} sh -c "echo scan > {KMEMLEAK_FILE}"' @@ -310,7 +403,7 @@ User: %s cmd = f'{SUDO} cat {KMEMLEAK_FILE}' self.result.kmemleak = check_output(cmd, shell=True) except KeyboardInterrupt: - self.kill_cmd(proc, True) + self.kill_cmd(proc, options, kmemleak, True) fail('\nRun terminated at user request.') finally: t.cancel() @@ -339,6 +432,8 @@ User: %s stdout/stderr/merged in its own file. """ + timeprefix = datetime.now().strftime('[%FT%T.%f] ') + logname = getpwuid(os.getuid()).pw_name rer = '' if self.reran is True: @@ -350,7 +445,7 @@ User: %s msga = 'Test: %s%s ' % (self.pathname, user) msgb = '[%s] [%s]%s\n' % (self.result.runtime, self.result.result, rer) pad = ' ' * (80 - (len(msga) + len(msgb))) - result_line = msga + pad + msgb + result_line = timeprefix + msga + pad + msgb # The result line is always written to the log file. If -q was # specified only failures are written to the console, otherwise @@ -450,7 +545,7 @@ Tags: %s return True - def run(self, options): + def run(self, options, dryrun=None, kmemleak=None): """ Create Cmd instances for the pre/post/failsafe scripts. If the pre script doesn't pass, skip this Test. Run the post script regardless. @@ -472,14 +567,14 @@ Tags: %s cont = True if len(pretest.pathname): - pretest.run(options.dryrun, False, options.kmsg) + pretest.run(options, kmemleak=False) cont = pretest.result.result == 'PASS' pretest.log(options) if cont: - test.run(options.dryrun, options.kmemleak, options.kmsg) + test.run(options, kmemleak=kmemleak) if test.result.result == 'KILLED' and len(failsafe.pathname): - failsafe.run(options.dryrun, False, options.kmsg) + failsafe.run(options, kmemleak=False) failsafe.log(options, suppress_console=True) else: test.skip() @@ -487,7 +582,7 @@ Tags: %s test.log(options) if len(posttest.pathname): - posttest.run(options.dryrun, False, options.kmsg) + posttest.run(options, kmemleak=False) posttest.log(options) @@ -571,7 +666,7 @@ Tags: %s return len(self.tests) != 0 - def run(self, options): + def run(self, options, dryrun=None, kmemleak=None): """ Create Cmd instances for the pre/post/failsafe scripts. If the pre script doesn't pass, skip all the tests in this TestGroup. Run the @@ -590,7 +685,7 @@ Tags: %s cont = True if len(pretest.pathname): - pretest.run(options.dryrun, False, options.kmsg) + pretest.run(options, dryrun=dryrun, kmemleak=False) cont = pretest.result.result == 'PASS' pretest.log(options) @@ -603,9 +698,9 @@ Tags: %s failsafe = Cmd(self.failsafe, outputdir=odir, timeout=self.timeout, user=self.failsafe_user, identifier=self.identifier) if cont: - test.run(options.dryrun, options.kmemleak, options.kmsg) + test.run(options, dryrun=dryrun, kmemleak=kmemleak) if test.result.result == 'KILLED' and len(failsafe.pathname): - failsafe.run(options.dryrun, False, options.kmsg) + failsafe.run(options, dryrun=dryrun, kmemleak=False) failsafe.log(options, suppress_console=True) else: test.skip() @@ -613,12 +708,12 @@ Tags: %s test.log(options) if len(posttest.pathname): - posttest.run(options.dryrun, False, options.kmsg) + posttest.run(options, dryrun=dryrun, kmemleak=False) posttest.log(options) class TestRun(object): - props = ['quiet', 'outputdir'] + props = ['quiet', 'outputdir', 'debug'] def __init__(self, options): self.tests = {} @@ -638,7 +733,8 @@ class TestRun(object): ('post_user', ''), ('failsafe', ''), ('failsafe_user', ''), - ('tags', []) + ('tags', []), + ('debug', False) ] def __str__(self): @@ -721,8 +817,11 @@ class TestRun(object): for opt in TestRun.props: if config.has_option('DEFAULT', opt): - setattr(self, opt, config.get('DEFAULT', opt)) - self.outputdir = os.path.join(self.outputdir, self.timestamp) + if opt == 'outputdir': + outputdir = config.get('DEFAULT', opt) + setattr(self, opt, os.path.join(outputdir, self.timestamp)) + else: + setattr(self, opt, config.get('DEFAULT', opt)) testdir = options.testdir @@ -750,6 +849,11 @@ class TestRun(object): failsafe = config.get(sect, prop) setattr(testgroup, prop, os.path.join(testdir, failsafe)) + elif prop == 'outputdir': + outputdir = config.get(sect, prop) + setattr(self, opt, + os.path.join(outputdir, + self.timestamp)) else: setattr(testgroup, prop, config.get(sect, prop)) @@ -768,6 +872,11 @@ class TestRun(object): failsafe = config.get(sect, prop) setattr(test, prop, os.path.join(testdir, failsafe)) + elif prop == 'outputdir': + outputdir = config.get(sect, prop) + setattr(self, opt, + os.path.join(outputdir, + self.timestamp)) else: setattr(test, prop, config.get(sect, prop)) @@ -811,9 +920,10 @@ class TestRun(object): def complete_outputdirs(self): """ - Collect all the pathnames for Tests, and TestGroups. Work - backwards one pathname component at a time, to create a unique - directory name in which to deposit test output. Tests will be able + Collect all the pathnames for Tests, and TestGroups. Strip off all + common leading path components, and append what remains to the top + "output" dir, to create a tree of output directories that match + the test and group names in structure. Tests will be able to write output files directly in the newly modified outputdir. TestGroups will be able to create one subdirectory per test in the outputdir, and are guaranteed uniqueness because a group can only @@ -822,24 +932,30 @@ class TestRun(object): question for their output. Failsafe scripts will create a directory rooted at the outputdir of each Test for their output. """ - done = False - components = 0 - tmp_dict = dict(list(self.tests.items()) + + + alltests = dict(list(self.tests.items()) + list(self.testgroups.items())) - total = len(tmp_dict) - base = self.outputdir - - while not done: - paths = [] - components -= 1 - for testfile in list(tmp_dict.keys()): - uniq = '/'.join(testfile.split('/')[components:]).lstrip('/') - if uniq not in paths: - paths.append(uniq) - tmp_dict[testfile].outputdir = os.path.join(base, uniq) - else: - break - done = total == len(paths) + base = os.path.join(self.outputdir, 'output') + + seen = [] + + for path in list(alltests.keys()): + frag = path.split('/') + for i in range(0, len(frag)): + if len(seen) == i: + seen.append({}) + seen[i][frag[i]] = 1 + + cut = 0 + for i in range(0, len(seen)): + if len(list(seen[i].keys())) == 1: + cut += 1 + else: + break + + for path in list(alltests.keys()): + uniq = path.split('/', cut)[-1] + alltests[path].outputdir = os.path.join(base, uniq) def setup_logging(self, options): """ @@ -1023,7 +1139,7 @@ def filter_tests(testrun, options): testrun.filter(failed) -def fail(retstr, ret=1): +def fail(retstr, ret=255): print('%s: %s' % (sys.argv[0], retstr)) exit(ret) @@ -1061,6 +1177,8 @@ def parse_args(): help='Specify tests to run via config files.') parser.add_option('-d', action='store_true', default=False, dest='dryrun', help='Dry run. Print tests, but take no other action.') + parser.add_option('-D', action='store_true', default=False, dest='debug', + help='Write all test output to stdout as it arrives.') parser.add_option('-l', action='callback', callback=options_cb, default=None, dest='logfile', metavar='logfile', type='string', @@ -1070,6 +1188,9 @@ def parse_args(): parser.add_option('-o', action='callback', callback=options_cb, default=BASEDIR, dest='outputdir', type='string', metavar='outputdir', help='Specify an output directory.') + parser.add_option('-O', action='store_true', default=False, + dest='timeout_debug', + help='Dump debugging info to /dev/kmsg on test timeout') parser.add_option('-i', action='callback', callback=options_cb, default=TESTDIR, dest='testdir', type='string', metavar='testdir', help='Specify a test directory.') @@ -1127,23 +1248,27 @@ def parse_args(): def main(): options = parse_args() - testrun = TestRun(options) + try: + testrun = TestRun(options) - if options.runfiles: - testrun.read(options) - else: - find_tests(testrun, options) + if options.runfiles: + testrun.read(options) + else: + find_tests(testrun, options) + + if options.logfile: + filter_tests(testrun, options) - if options.logfile: - filter_tests(testrun, options) + if options.template: + testrun.write(options) + exit(0) - if options.template: - testrun.write(options) - exit(0) + testrun.complete_outputdirs() + testrun.run(options) + exit(testrun.summary()) - testrun.complete_outputdirs() - testrun.run(options) - exit(testrun.summary()) + except Exception: + fail("Uncaught exception in test runner:\n" + traceback.format_exc()) if __name__ == '__main__': |
