1414
1515import json
1616from dataclasses import dataclass
17- from typing import TYPE_CHECKING
17+ from typing import TYPE_CHECKING , NamedTuple
1818
1919if TYPE_CHECKING :
2020 from pathlib import Path
2727_IMPROVEMENT_THRESHOLD = 0.05
2828
2929
30+ class _BenchmarkData (NamedTuple ):
31+ mean_ns : float
32+ output_hash : str | None
33+
34+
3035# ---------------------------------------------------------------------------
3136# Public data classes
3237# ---------------------------------------------------------------------------
@@ -54,6 +59,9 @@ def change_pct(self) -> str:
5459 sign = "+" if self .change_ratio >= 0 else ""
5560 return f"{ sign } { self .change_ratio * 100 :.1f} %"
5661
62+ output_changed : bool | None = None
63+ """None when one or both runs were collected without --codspeed-capture-output."""
64+
5765 @property
5866 def is_regression (self ) -> bool :
5967 return self .change_ratio > _REGRESSION_THRESHOLD
@@ -114,18 +122,21 @@ def find_baseline(results_dir: Path, current_path: Path) -> Path | None:
114122# ---------------------------------------------------------------------------
115123
116124
117- def _extract_benchmarks (data : dict [str , Any ]) -> dict [str , float ]:
118- """Return ``{uri: mean_ns }`` from a parsed results JSON.
125+ def _extract_benchmarks (data : dict [str , Any ]) -> dict [str , _BenchmarkData ]:
126+ """Return ``{uri: _BenchmarkData }`` from a parsed results JSON.
119127
120128 Benchmarks without a ``stats.mean_ns`` field (e.g. simulation-mode
121129 stubs) are silently ignored.
122130 """
123- result : dict [str , float ] = {}
131+ result : dict [str , _BenchmarkData ] = {}
124132 for bench in data .get ("benchmarks" , []):
125133 stats = bench .get ("stats" ) or {}
126134 mean_ns = stats .get ("mean_ns" )
127135 if mean_ns is not None :
128- result [bench ["uri" ]] = float (mean_ns )
136+ result [bench ["uri" ]] = _BenchmarkData (
137+ mean_ns = float (mean_ns ),
138+ output_hash = bench .get ("output_hash" ),
139+ )
129140 return result
130141
131142
@@ -148,14 +159,25 @@ def compare_results(baseline_path: Path, current_path: Path) -> ComparisonReport
148159 unchanged : list [BenchmarkDiff ] = []
149160 new_benchmarks : list [str ] = []
150161
151- for uri , current_mean in current .items ():
162+ for uri , current_data in current .items ():
152163 if uri not in baseline :
153164 new_benchmarks .append (uri )
154165 continue
166+ baseline_data = baseline [uri ]
167+ if (
168+ baseline_data .output_hash is not None
169+ and current_data .output_hash is not None
170+ ):
171+ output_changed : bool | None = (
172+ baseline_data .output_hash != current_data .output_hash
173+ )
174+ else :
175+ output_changed = None
155176 diff = BenchmarkDiff (
156177 name = uri ,
157- baseline_mean_ns = baseline [uri ],
158- current_mean_ns = current_mean ,
178+ baseline_mean_ns = baseline_data .mean_ns ,
179+ current_mean_ns = current_data .mean_ns ,
180+ output_changed = output_changed ,
159181 )
160182 if diff .is_regression :
161183 regressions .append (diff )
@@ -199,6 +221,35 @@ def _short_name(uri: str) -> str:
199221 return uri .split ("::" )[- 1 ] if "::" in uri else uri
200222
201223
224+ def _fmt_diff (diff : BenchmarkDiff ) -> str :
225+ line = (
226+ f" { _short_name (diff .name ):<42} "
227+ f" { _format_ns (diff .baseline_mean_ns ):>8} "
228+ f" → { _format_ns (diff .current_mean_ns ):>8} "
229+ f" { diff .change_pct } "
230+ )
231+ if diff .output_changed is True :
232+ line += " ! output changed"
233+ return line
234+
235+
236+ def _print_footer (report : ComparisonReport ) -> None :
237+ correctness_warnings = sum (
238+ 1
239+ for d in (* report .regressions , * report .improvements , * report .unchanged )
240+ if d .output_changed is True
241+ )
242+ footer = (
243+ f"\n { report .total_compared } compared"
244+ f" · { len (report .regressions )} regression(s)"
245+ f" · { len (report .improvements )} improvement(s)"
246+ )
247+ if correctness_warnings :
248+ footer += f" · { correctness_warnings } correctness warning(s)"
249+ print (footer )
250+ print ()
251+
252+
202253def print_comparison_report (report : ComparisonReport , baseline_path : Path ) -> None :
203254 """Print a human-readable comparison report to stdout.
204255
@@ -214,22 +265,12 @@ def print_comparison_report(report: ComparisonReport, baseline_path: Path) -> No
214265 if report .regressions :
215266 print (f"\n ✗ Regressions ({ len (report .regressions )} )" )
216267 for diff in report .regressions :
217- print (
218- f" { _short_name (diff .name ):<42} "
219- f" { _format_ns (diff .baseline_mean_ns ):>8} "
220- f" → { _format_ns (diff .current_mean_ns ):>8} "
221- f" { diff .change_pct } "
222- )
268+ print (_fmt_diff (diff ))
223269
224270 if report .improvements :
225271 print (f"\n ✓ Improvements ({ len (report .improvements )} )" )
226272 for diff in report .improvements :
227- print (
228- f" { _short_name (diff .name ):<42} "
229- f" { _format_ns (diff .baseline_mean_ns ):>8} "
230- f" → { _format_ns (diff .current_mean_ns ):>8} "
231- f" { diff .change_pct } "
232- )
273+ print (_fmt_diff (diff ))
233274
234275 if report .new_benchmarks :
235276 print (f"\n + New ({ len (report .new_benchmarks )} )" )
@@ -241,9 +282,4 @@ def print_comparison_report(report: ComparisonReport, baseline_path: Path) -> No
241282 for uri in report .removed_benchmarks :
242283 print (f" { _short_name (uri )} " )
243284
244- print (
245- f"\n { report .total_compared } compared"
246- f" · { len (report .regressions )} regression(s)"
247- f" · { len (report .improvements )} improvement(s)"
248- )
249- print ()
285+ _print_footer (report )
0 commit comments