-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtest_regression.py
More file actions
202 lines (161 loc) · 6.27 KB
/
Copy pathtest_regression.py
File metadata and controls
202 lines (161 loc) · 6.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
import numpy as np
import pytest
from tests.regression.cases import discover_cases
from tests.regression.helpers import run_codeentropy_with_config
CASES = discover_cases()
def pytest_generate_tests(metafunc: pytest.Metafunc) -> None:
"""
Dynamically parametrize regression test cases.
This hook enables pytest-xdist to distribute individual regression cases
across multiple workers by generating test parametrization at collection time.
Args:
metafunc (pytest.Metafunc): Pytest metafunction object used to inspect
and modify test function parametrization.
"""
if "case" in metafunc.fixturenames:
metafunc.parametrize(
"case",
CASES,
ids=[c.id for c in CASES],
)
def _group_index(payload: dict[str, Any]) -> dict[str, dict[str, Any]]:
"""
Extract the grouped structure from a regression output payload.
Args:
payload (dict[str, Any]): The full output payload produced by CodeEntropy.
Returns:
dict[str, dict[str, Any]]: Mapping of group IDs to group data.
Raises:
TypeError: If the 'groups' field is not a dictionary.
"""
groups = payload.get("groups", {})
if not isinstance(groups, dict):
raise TypeError("payload['groups'] must be a dict")
return groups
def _baseline_payload(payload: dict[str, Any]) -> dict[str, Any]:
"""
Build the persisted regression baseline payload.
Baselines intentionally store only grouped entropy results, not runtime
arguments or provenance, so they remain stable across machines and runs.
"""
return {
"groups": _group_index(payload),
}
def _compare_grouped(
*,
got_payload: dict[str, Any],
baseline_payload: dict[str, Any],
rtol: float,
atol: float,
) -> None:
"""
Compare grouped regression outputs against baseline data.
Performs a structured comparison of group components and totals,
using numerical tolerances for floating-point comparisons.
Args:
got_payload (dict[str, Any]): Output generated by the test run.
baseline_payload (dict[str, Any]): Reference baseline output.
rtol (float): Relative tolerance for numeric comparisons.
atol (float): Absolute tolerance for numeric comparisons.
Raises:
AssertionError: If any mismatches are found between outputs and baseline.
"""
got_groups = _group_index(got_payload)
base_groups = _group_index(baseline_payload)
missing_groups = sorted(set(base_groups.keys()) - set(got_groups.keys()))
assert not missing_groups, f"Missing groups in output: {missing_groups}"
mismatches: list[str] = []
for gid, base_g in base_groups.items():
got_g = got_groups[gid]
base_components = base_g.get("components", {})
got_components = got_g.get("components", {})
if not isinstance(base_components, dict) or not isinstance(
got_components, dict
):
mismatches.append(f"group {gid}: components must be dicts")
continue
missing_keys = sorted(set(base_components.keys()) - set(got_components.keys()))
if missing_keys:
mismatches.append(f"group {gid}: missing component keys: {missing_keys}")
continue
for k, expected in base_components.items():
actual = got_components[k]
try:
np.testing.assert_allclose(
float(actual), float(expected), rtol=rtol, atol=atol
)
except AssertionError:
mismatches.append(
f"group {gid} component {k}: expected={expected} got={actual}"
)
if "total" in base_g:
try:
np.testing.assert_allclose(
float(got_g.get("total", 0.0)),
float(base_g["total"]),
rtol=rtol,
atol=atol,
)
except AssertionError:
mismatches.append(
f"group {gid} total: expected={base_g['total']} "
f"got={got_g.get('total')}"
)
assert not mismatches, "Mismatches:\n" + "\n".join(" " + m for m in mismatches)
@pytest.mark.regression
def test_regression_matches_baseline(
tmp_path: Path,
case,
request: pytest.FixtureRequest,
) -> None:
"""
Execute a regression test for a single scenario and compare against baseline.
This test:
1. Loads a YAML configuration for a given system/scenario
2. Runs CodeEntropy using that configuration
3. Compares the output payload against a stored baseline JSON
4. Optionally updates the baseline if --update-baselines is set
Args:
tmp_path (Path): Temporary directory provided by pytest.
case: A RegressionCase instance containing system, config, and baseline paths.
request (pytest.FixtureRequest): Pytest request object used to access CLI
options.
Raises:
AssertionError: If the output does not match the baseline or
baseline is missing.
"""
system = case.system
config_path = case.config_path
baseline_path = case.baseline_path
update_mode = request.config.getoption("--update-baselines")
if not baseline_path.exists():
if not update_mode:
raise AssertionError(f"Missing baseline: {baseline_path}")
baseline_payload = {}
else:
baseline_payload = json.loads(baseline_path.read_text())
run = run_codeentropy_with_config(
workdir=tmp_path,
config_src=config_path,
)
if request.config.getoption("--codeentropy-debug"):
print("\n[DEBUG]")
print("system:", system)
print("config:", config_path)
print("baseline:", baseline_path)
if request.config.getoption("--update-baselines"):
baseline_path.parent.mkdir(parents=True, exist_ok=True)
baseline_path.write_text(
json.dumps(_baseline_payload(run.payload), indent=2) + "\n"
)
pytest.skip(f"Updated baseline for {system}")
_compare_grouped(
got_payload=run.payload,
baseline_payload=baseline_payload,
rtol=1e-9,
atol=0.5,
)