|
8 | 8 | import subprocess |
9 | 9 | import sys |
10 | 10 | import uuid |
| 11 | +import xml.etree.ElementTree as ET |
11 | 12 | from datetime import datetime |
12 | 13 | from pathlib import Path |
13 | 14 |
|
|
24 | 25 | _PYTEST_VALUE_FLAGS = {"-n", "-k", "-m", "-p", "--tb", "--junitxml", "--rootdir"} |
25 | 26 |
|
26 | 27 |
|
| 28 | +def _junit_xml_indicates_pass(results_dir): |
| 29 | + """Return True if `pytest` junit XML under `results_dir` reports no failures/errors. |
| 30 | +
|
| 31 | + Used to distinguish a real CI failure from the docker 18.09 |
| 32 | + container-teardown `SIGKILL` (exit code 137) that occurs on this host |
| 33 | + after a child process exits successfully — bash returns 0 from inside |
| 34 | + the container, but the docker daemon reports 137 due to a race in its |
| 35 | + `--rm` cleanup path. The junit XML is written by pytest before that |
| 36 | + teardown and reliably captures the real outcome of the test stage. |
| 37 | + """ |
| 38 | + for junit in Path(results_dir).rglob("test-results.xml"): |
| 39 | + try: |
| 40 | + root = ET.parse(junit).getroot() |
| 41 | + except ET.ParseError: |
| 42 | + continue |
| 43 | + |
| 44 | + suites = root.findall("testsuite") if root.tag == "testsuites" else [root] |
| 45 | + |
| 46 | + if not suites: |
| 47 | + continue |
| 48 | + |
| 49 | + for suite in suites: |
| 50 | + try: |
| 51 | + if int(suite.get("failures", 0)) > 0: |
| 52 | + return False |
| 53 | + |
| 54 | + if int(suite.get("errors", 0)) > 0: |
| 55 | + return False |
| 56 | + except ValueError: |
| 57 | + return False |
| 58 | + |
| 59 | + return True |
| 60 | + |
| 61 | + return False |
| 62 | + |
| 63 | + |
27 | 64 | def apply_test_override(run_cmd, test_path): |
28 | 65 | """Replace positional test path(s) in a pytest stage command. |
29 | 66 |
|
@@ -437,8 +474,23 @@ def main(): |
437 | 474 | pool.release(allocated_ids) |
438 | 475 |
|
439 | 476 | if returncode != 0: |
440 | | - print(f"job {job_name} failed (exit code {returncode})", file=sys.stderr) |
441 | | - failed += 1 |
| 477 | + # Docker 18.09 on this host occasionally SIGKILLs containers |
| 478 | + # during `--rm` cleanup after the inner process already exited |
| 479 | + # cleanly, producing exit code 137. Fall back to the pytest |
| 480 | + # junit XML to recover the real outcome in that case. |
| 481 | + if returncode == 137 and _junit_xml_indicates_pass(results_dir): |
| 482 | + print( |
| 483 | + f"[warn] job {job_name}: container exited with 137 " |
| 484 | + f"(likely docker teardown SIGKILL after clean pytest); " |
| 485 | + f"junit XML reports no failures — treating as success", |
| 486 | + file=sys.stderr, |
| 487 | + ) |
| 488 | + else: |
| 489 | + print( |
| 490 | + f"job {job_name} failed (exit code {returncode})", |
| 491 | + file=sys.stderr, |
| 492 | + ) |
| 493 | + failed += 1 |
442 | 494 |
|
443 | 495 | sys.exit(1 if failed else 0) |
444 | 496 |
|
|
0 commit comments