Skip to content

Commit 4319927

Browse files
pi-anldpgeorge
authored andcommitted
tests/run-tests.py: Ignore known-flaky test failures.
Reclassify failures of tests listed in flaky_tests_to_ignore as "ignored" instead of retrying them. Ignored tests still run and their output is reported, but they don't affect the exit code. The ci.sh --exclude lists for these tests are removed so they run normally. Signed-off-by: Andrew Leech <andrew.leech@planet-innovation.com>
1 parent ad054fc commit 4319927

3 files changed

Lines changed: 48 additions & 14 deletions

File tree

tests/run-tests.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
get_test_instance,
3232
prepare_script_for_target,
3333
create_test_report,
34+
FLAKY_REASON_PREFIX,
3435
)
3536

3637
RV32_ARCH_FLAGS = {
@@ -193,6 +194,23 @@
193194
),
194195
}
195196

197+
# Tests with known intermittent failures. These tests still run, but failures
198+
# are reclassified as "ignored" instead of "fail" so they don't affect the CI
199+
# exit code. Paths are relative to the tests/ directory (must match test_file
200+
# format used by run_one_test, which normalises backslashes to forward slashes).
201+
#
202+
# Values are (reason, platforms) tuples where platforms is None (all platforms)
203+
# or a tuple of sys.platform strings to restrict ignoring to those platforms.
204+
flaky_tests_to_ignore = {
205+
"thread/thread_gc1.py": ("GC race condition", None),
206+
"thread/stress_schedule.py": ("intermittent crash under QEMU", None),
207+
"thread/stress_recurse.py": ("stack overflow under emulation", None),
208+
"thread/stress_heap.py": ("flaky on macOS", ("darwin",)),
209+
"cmdline/repl_lock.py": ("REPL timing under QEMU", None),
210+
"cmdline/repl_cont.py": ("REPL escaping on macOS", ("darwin",)),
211+
"extmod/time_time_ns.py": ("CI runner clock precision", None),
212+
}
213+
196214
# These tests don't test float explicitly but rather use it to perform the test.
197215
tests_requiring_float = (
198216
"extmod/asyncio_basic.py",
@@ -1062,6 +1080,16 @@ def run_one_test(test_file):
10621080
print(line)
10631081
sys.exit(2)
10641082

1083+
# Reclassify known-flaky test failures as ignored.
1084+
# Safe to mutate: thread pool has joined.
1085+
results = test_results.value
1086+
for i, r in enumerate(results):
1087+
if r[1] == "fail":
1088+
reason, platforms = flaky_tests_to_ignore.get(r[0], (None, None))
1089+
if reason is not None:
1090+
if platforms is None or sys.platform in platforms:
1091+
results[i] = (r[0], "ignored", "{}: {}".format(FLAKY_REASON_PREFIX, reason))
1092+
10651093
# Return test results.
10661094
return test_results.value, testcase_count.value
10671095

tests/test_utils.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ def base_path(*p):
2222
sys.path.append(base_path("../tools"))
2323
import pyboard
2424

25+
# Prefix used by run-tests.py to tag known-flaky test results.
26+
FLAKY_REASON_PREFIX = "flaky"
27+
2528
# File with the test results.
2629
_RESULTS_FILE = "_results.json"
2730

@@ -313,11 +316,12 @@ def create_test_report(args, test_results, testcase_count=None):
313316
r for r in test_results if r[1] == "skip" and r[2] == "too large"
314317
)
315318
failed_tests = list(r for r in test_results if r[1] == "fail")
319+
ignored_tests = list(r for r in test_results if r[1] == "ignored")
316320
dry_run = getattr(args, "dry_run", False)
317321
if dry_run:
318322
found_tests = list(r for r in test_results if r[1] == "found")
319323

320-
num_tests_performed = len(passed_tests) + len(failed_tests)
324+
num_tests_performed = len(passed_tests) + len(failed_tests) + len(ignored_tests)
321325

322326
if dry_run:
323327
print("{} tests found".format(len(found_tests)))
@@ -329,6 +333,14 @@ def create_test_report(args, test_results, testcase_count=None):
329333

330334
print("{} tests passed".format(len(passed_tests)))
331335

336+
if len(ignored_tests) > 0:
337+
print(
338+
"{} tests had known-flaky failures (ignored): {}".format(
339+
len(ignored_tests),
340+
" ".join("{} [{}]".format(t[0], t[2]) for t in ignored_tests),
341+
)
342+
)
343+
332344
if len(skipped_tests) > 0:
333345
print(
334346
"{} tests skipped: {}".format(
@@ -365,6 +377,8 @@ def to_json(obj):
365377
"results": list(test for test in test_results),
366378
# A list of failed tests. This is deprecated, use the "results" above instead.
367379
"failed_tests": [test[0] for test in failed_tests],
380+
# A list of known-flaky tests whose failures were ignored.
381+
"ignored_tests": [test[0] for test in ignored_tests],
368382
},
369383
f,
370384
default=to_json,

tools/ci.sh

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -905,9 +905,7 @@ function ci_unix_macos_run_tests {
905905
# Issues with macOS tests:
906906
# - float_parse and float_parse_doubleprec parse/print floats out by a few mantissa bits
907907
# - ffi_callback crashes for an unknown reason
908-
# - thread/stress_heap.py is flaky
909-
# - thread/thread_gc1.py is flaky
910-
(cd tests && MICROPY_MICROPYTHON=../ports/unix/build-standard/micropython ./run-tests.py --exclude '(float_parse|float_parse_doubleprec|ffi_callback|thread/stress_heap|thread/thread_gc1).py')
908+
(cd tests && MICROPY_MICROPYTHON=../ports/unix/build-standard/micropython ./run-tests.py --exclude '(float_parse|float_parse_doubleprec|ffi_callback).py')
911909
}
912910

913911
function ci_unix_qemu_mips_setup {
@@ -927,10 +925,8 @@ function ci_unix_qemu_mips_build {
927925
function ci_unix_qemu_mips_run_tests {
928926
# Issues with MIPS tests:
929927
# - thread/stress_aes.py takes around 90 seconds
930-
# - thread/stress_recurse.py is flaky
931-
# - thread/thread_gc1.py is flaky
932928
file ./ports/unix/build-coverage/micropython
933-
(cd tests && MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython MICROPY_TEST_TIMEOUT=180 ./run-tests.py --exclude 'thread/stress_recurse.py|thread/thread_gc1.py')
929+
(cd tests && MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython MICROPY_TEST_TIMEOUT=180 ./run-tests.py)
934930
}
935931

936932
function ci_unix_qemu_arm_setup {
@@ -950,10 +946,8 @@ function ci_unix_qemu_arm_build {
950946
function ci_unix_qemu_arm_run_tests {
951947
# Issues with ARM tests:
952948
# - thread/stress_aes.py takes around 70 seconds
953-
# - thread/stress_recurse.py is flaky
954-
# - thread/thread_gc1.py is flaky
955949
file ./ports/unix/build-coverage/micropython
956-
(cd tests && MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython MICROPY_TEST_TIMEOUT=90 ./run-tests.py --exclude 'thread/stress_recurse.py|thread/thread_gc1.py')
950+
(cd tests && MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython MICROPY_TEST_TIMEOUT=90 ./run-tests.py)
957951
}
958952

959953
function ci_unix_qemu_riscv64_setup {
@@ -976,12 +970,10 @@ function ci_unix_qemu_riscv64_build {
976970

977971
function ci_unix_qemu_riscv64_run_tests {
978972
# Issues with RISCV-64 tests:
979-
# - thread/stress_aes.py takes around 180 seconds
980-
# - thread/stress_recurse.py is flaky
981-
# - thread/thread_gc1.py is flaky
973+
# - thread/stress_aes.py takes around 180 seconds, so exclude it to keep execution time down
982974
file ./ports/unix/build-coverage/micropython
983975
pushd tests
984-
MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython MICROPY_TEST_TIMEOUT=200 ./run-tests.py --exclude 'thread/stress_recurse.py|thread/thread_gc1.py'
976+
MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython ./run-tests.py --exclude 'thread/stress_aes.py'
985977
MICROPY_MICROPYTHON=../ports/unix/build-coverage/micropython ./run-natmodtests.py extmod/btree*.py extmod/deflate*.py extmod/framebuf*.py extmod/heapq*.py extmod/random_basic*.py extmod/re*.py
986978
popd
987979
}

0 commit comments

Comments
 (0)