Skip to content

Commit b19e1bd

Browse files
committed
perf: replace backtracking regexes with character-class patterns in parse_test_output
Replace lazy `.*?` quantifiers in matches_re_start/matches_re_end with negated character classes (`[^:]`, `[^#]`, `[^.:]`) to eliminate quadratic backtracking. Replace per-line regex search for the pytest FAILURES header with a simple `"= FAILURES =" in line` string check. Add tests for the regex patterns and failure header detection.
1 parent b058dee commit b19e1bd

2 files changed

Lines changed: 208 additions & 4 deletions

File tree

codeflash/verification/parse_test_output.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,24 @@ def parse_func(file_path: Path) -> XMLParser:
4747
return parse(file_path, xml_parser)
4848

4949

50-
matches_re_start = re.compile(r"!\$######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######\$!\n")
51-
matches_re_end = re.compile(r"!######(.*?):(.*?)([^\.:]*?):(.*?):(.*?):(.*?)######!")
50+
matches_re_start = re.compile(
51+
r"!\$######([^:]*)" # group 1: module path
52+
r":((?:[^:.]*\.)*)" # group 2: class prefix with trailing dot, or empty
53+
r"([^.:]*)" # group 3: test function name
54+
r":([^:]*)" # group 4: function being tested
55+
r":([^:]*)" # group 5: loop index
56+
r":([^#]*)" # group 6: iteration id
57+
r"######\$!\n"
58+
)
59+
matches_re_end = re.compile(
60+
r"!######([^:]*)" # group 1: module path
61+
r":((?:[^:.]*\.)*)" # group 2: class prefix with trailing dot, or empty
62+
r"([^.:]*)" # group 3: test function name
63+
r":([^:]*)" # group 4: function being tested
64+
r":([^:]*)" # group 5: loop index
65+
r":([^#]*)" # group 6: iteration_id or iteration_id:runtime
66+
r"######!"
67+
)
5268

5369

5470
start_pattern = re.compile(r"!\$######([^:]*):([^:]*):([^:]*):([^:]*):([^:]+)######\$!")
@@ -893,7 +909,6 @@ def merge_test_results(
893909
return merged_test_results
894910

895911

896-
FAILURES_HEADER_RE = re.compile(r"=+ FAILURES =+")
897912
TEST_HEADER_RE = re.compile(r"_{3,}\s*(.*?)\s*_{3,}$")
898913

899914

@@ -903,7 +918,7 @@ def parse_test_failures_from_stdout(stdout: str) -> dict[str, str]:
903918
start = end = None
904919

905920
for i, line in enumerate(lines):
906-
if FAILURES_HEADER_RE.search(line.strip()):
921+
if "= FAILURES =" in line:
907922
start = i
908923
break
909924

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
"""Tests for the regex patterns and string matching in parse_test_output.py."""
2+
3+
from codeflash.verification.parse_test_output import (
4+
matches_re_end,
5+
matches_re_start,
6+
parse_test_failures_from_stdout,
7+
)
8+
9+
10+
# --- matches_re_start tests ---
11+
12+
13+
class TestMatchesReStart:
14+
def test_simple_no_class(self):
15+
s = "!$######tests.test_foo:test_bar:target_func:1:abc######$!\n"
16+
m = matches_re_start.search(s)
17+
assert m is not None
18+
assert m.groups() == ("tests.test_foo", "", "test_bar", "target_func", "1", "abc")
19+
20+
def test_with_class(self):
21+
s = "!$######tests.test_foo:MyClass.test_bar:target_func:1:abc######$!\n"
22+
m = matches_re_start.search(s)
23+
assert m is not None
24+
assert m.groups() == ("tests.test_foo", "MyClass.", "test_bar", "target_func", "1", "abc")
25+
26+
def test_nested_class(self):
27+
s = "!$######a.b.c:A.B.test_x:func:3:id123######$!\n"
28+
m = matches_re_start.search(s)
29+
assert m is not None
30+
assert m.groups() == ("a.b.c", "A.B.", "test_x", "func", "3", "id123")
31+
32+
def test_empty_class_and_function(self):
33+
s = "!$######mod::func:0:iter######$!\n"
34+
m = matches_re_start.search(s)
35+
assert m is not None
36+
assert m.groups() == ("mod", "", "", "func", "0", "iter")
37+
38+
def test_embedded_in_stdout(self):
39+
s = "some output\n!$######mod:test_fn:f:1:x######$!\nmore output\n"
40+
m = matches_re_start.search(s)
41+
assert m is not None
42+
assert m.groups() == ("mod", "", "test_fn", "f", "1", "x")
43+
44+
def test_multiple_matches(self):
45+
s = (
46+
"!$######m1:C1.fn1:t1:1:a######$!\n"
47+
"!$######m2:fn2:t2:2:b######$!\n"
48+
)
49+
matches = list(matches_re_start.finditer(s))
50+
assert len(matches) == 2
51+
assert matches[0].groups() == ("m1", "C1.", "fn1", "t1", "1", "a")
52+
assert matches[1].groups() == ("m2", "", "fn2", "t2", "2", "b")
53+
54+
def test_no_match_without_newline(self):
55+
s = "!$######mod:test_fn:f:1:x######$!"
56+
m = matches_re_start.search(s)
57+
assert m is None
58+
59+
def test_dots_in_module_path(self):
60+
s = "!$######a.b.c.d.e:test_fn:f:1:x######$!\n"
61+
m = matches_re_start.search(s)
62+
assert m is not None
63+
assert m.group(1) == "a.b.c.d.e"
64+
65+
66+
# --- matches_re_end tests ---
67+
68+
69+
class TestMatchesReEnd:
70+
def test_simple_no_class_with_runtime(self):
71+
s = "!######tests.test_foo:test_bar:target_func:1:abc:12345######!"
72+
m = matches_re_end.search(s)
73+
assert m is not None
74+
assert m.groups() == ("tests.test_foo", "", "test_bar", "target_func", "1", "abc:12345")
75+
76+
def test_with_class_no_runtime(self):
77+
s = "!######tests.test_foo:MyClass.test_bar:target_func:1:abc######!"
78+
m = matches_re_end.search(s)
79+
assert m is not None
80+
assert m.groups() == ("tests.test_foo", "MyClass.", "test_bar", "target_func", "1", "abc")
81+
82+
def test_nested_class_with_runtime(self):
83+
s = "!######mod:A.B.test_x:func:3:id123:99999######!"
84+
m = matches_re_end.search(s)
85+
assert m is not None
86+
assert m.groups() == ("mod", "A.B.", "test_x", "func", "3", "id123:99999")
87+
88+
def test_runtime_colon_preserved_in_group6(self):
89+
"""Group 6 must capture 'iteration_id:runtime' as a single string (colon included)."""
90+
s = "!######m:fn:f:1:iter42:98765######!"
91+
m = matches_re_end.search(s)
92+
assert m is not None
93+
assert m.group(6) == "iter42:98765"
94+
95+
def test_embedded_in_stdout(self):
96+
s = "captured output\n!######mod:test_fn:f:1:x:500######!\nmore"
97+
m = matches_re_end.search(s)
98+
assert m is not None
99+
assert m.groups() == ("mod", "", "test_fn", "f", "1", "x:500")
100+
101+
102+
# --- Start/End pairing (simulates parse_test_xml matching logic) ---
103+
104+
105+
class TestStartEndPairing:
106+
def test_paired_markers(self):
107+
stdout = (
108+
"!$######mod:Class.test_fn:func:1:iter1######$!\n"
109+
"test output here\n"
110+
"!######mod:Class.test_fn:func:1:iter1:54321######!"
111+
)
112+
starts = list(matches_re_start.finditer(stdout))
113+
ends = {}
114+
for match in matches_re_end.finditer(stdout):
115+
groups = match.groups()
116+
g5 = groups[5]
117+
colon_pos = g5.find(":")
118+
if colon_pos != -1:
119+
key = groups[:5] + (g5[:colon_pos],)
120+
else:
121+
key = groups
122+
ends[key] = match
123+
124+
assert len(starts) == 1
125+
assert len(ends) == 1
126+
# Start and end should pair on the first 5 groups + iteration_id
127+
start_groups = starts[0].groups()
128+
assert start_groups in ends
129+
130+
131+
# --- parse_test_failures_from_stdout tests ---
132+
133+
134+
class TestParseTestFailuresHeader:
135+
def test_standard_pytest_header(self):
136+
stdout = (
137+
"..F.\n"
138+
"=================================== FAILURES ===================================\n"
139+
"_______ test_foo _______\n"
140+
"\n"
141+
" def test_foo():\n"
142+
"> assert False\n"
143+
"E AssertionError\n"
144+
"\n"
145+
"test.py:3: AssertionError\n"
146+
"=========================== short test summary info ============================\n"
147+
"FAILED test.py::test_foo\n"
148+
)
149+
result = parse_test_failures_from_stdout(stdout)
150+
assert "test_foo" in result
151+
152+
def test_minimal_equals(self):
153+
"""Even a short '= FAILURES =' header should be detected."""
154+
stdout = (
155+
"= FAILURES =\n"
156+
"_______ test_bar _______\n"
157+
"\n"
158+
" assert False\n"
159+
"\n"
160+
"test.py:1: AssertionError\n"
161+
"= short test summary info =\n"
162+
)
163+
result = parse_test_failures_from_stdout(stdout)
164+
assert "test_bar" in result
165+
166+
def test_no_failures_section(self):
167+
stdout = "....\n4 passed in 0.1s\n"
168+
result = parse_test_failures_from_stdout(stdout)
169+
assert result == {}
170+
171+
def test_word_failures_without_equals_is_not_matched(self):
172+
"""'FAILURES' without surrounding '=' signs should not trigger the header detection."""
173+
stdout = (
174+
"FAILURES detected in module\n"
175+
"_______ test_baz _______\n"
176+
"\n"
177+
" assert False\n"
178+
)
179+
result = parse_test_failures_from_stdout(stdout)
180+
assert result == {}
181+
182+
def test_failures_in_test_output_not_matched(self):
183+
"""A test printing 'FAILURES' (no = signs) should not trigger header detection."""
184+
stdout = (
185+
"Testing FAILURES handling\n"
186+
"All good\n"
187+
)
188+
result = parse_test_failures_from_stdout(stdout)
189+
assert result == {}

0 commit comments

Comments
 (0)