Skip to content

Commit 91bc940

Browse files
authored
more succinct benchmark summaries (#6959)
<!-- Thank you for submitting a pull request! We appreciate your time and effort. Please make sure to provide enough information so that we can review your pull request. The Summary and Testing sections below contain guidance on what to include. --> ## Summary <!-- If this PR is related to a tracked effort, please link to the relevant issue here (e.g., `Closes: #123`). Otherwise, feel free to ignore / delete this. In this section, please: 1. Explain the rationale for this change. 2. Summarize the changes included in this PR. A general rule of thumb is that larger PRs should have larger summaries. If there are a lot of changes, please help us review the code by explaining what was changed and why. If there is an issue or discussion attached, there is no need to duplicate all the details, but clarity is always preferred over brevity. --> Closes: #000 <!-- ## API Changes Uncomment this section if there are any user-facing changes. Consider whether the change affects users in one of the following ways: 1. Breaks public APIs in some way. 2. Changes the underlying behavior of one of the engine integrations. 3. Should some documentation be updated to reflect this change? If a public API is changed in a breaking manner, make sure to add the appropriate label. You can run `./scripts/public-api.sh` locally to see if there are any public API changes (and this also runs in our CI). --> ## Testing <!-- Please describe how this change was tested. Here are some common categories for testing in Vortex: 1. Verifying existing behavior is maintained. 2. Verifying new behavior and functionality works correctly. 3. Serialization compatibility (backwards and forwards) should be maintained or explicitly broken. --> --------- Signed-off-by: Will Manning <will@willmanning.io>
1 parent 74a31af commit 91bc940

1 file changed

Lines changed: 22 additions & 65 deletions

File tree

scripts/compare-benchmark-jsons.py

Lines changed: 22 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -143,19 +143,6 @@ def ratio_stats(
143143
}
144144

145145

146-
def robust_scale(values: pd.Series | np.ndarray) -> float:
147-
"""Estimate spread with MAD so outliers do not dominate the noise estimate."""
148-
149-
array = np.asarray(values, dtype=float)
150-
array = array[np.isfinite(array)]
151-
if array.size == 0:
152-
return float("nan")
153-
154-
median = np.median(array)
155-
mad = np.median(np.abs(array - median))
156-
return float(1.4826 * mad)
157-
158-
159146
def median_polish(table: pd.DataFrame, max_iterations: int = 10, tolerance: float = 1e-8) -> MedianPolishResult | None:
160147
"""Estimate row and column effects for the log-ratio matrix."""
161148

@@ -309,20 +296,16 @@ def build_statistical_analysis(df: pd.DataFrame, threshold_pct: int) -> dict[str
309296
axis=1,
310297
)
311298

312-
# Median polish gives a robust overall shift plus residual-noise estimate.
299+
# Median polish gives a robust overall shift estimate.
313300
log_ratio_table = detail_df.pivot(index="query", columns="combo", values="log_ratio")
314301
polish = median_polish(log_ratio_table)
315-
residual_noise_log_scale = robust_scale(polish.residuals.to_numpy().ravel()) if polish is not None else float("nan")
316302

317303
return {
318304
"detail_df": detail_df,
319305
"query_stats": query_stats,
320306
"systemic_shift_ratio": float(np.exp(systemic_shift_log_ratio)),
321307
"systemic_shift_std": systemic_shift_std,
322308
"median_polish": polish,
323-
"residual_noise_ratio": float(np.exp(residual_noise_log_scale))
324-
if np.isfinite(residual_noise_log_scale)
325-
else float("nan"),
326309
}
327310

328311

@@ -527,72 +510,46 @@ def main() -> None:
527510
vortex_df = df3[df3["name"].str.contains("vortex", case=False, na=False)]
528511
parquet_df = df3[df3["name"].str.contains("parquet", case=False, na=False)]
529512

530-
geo_mean_ratio = calculate_geo_mean(df3)
531513
vortex_geo_mean_ratio = calculate_geo_mean(vortex_df)
532514
parquet_geo_mean_ratio = calculate_geo_mean(parquet_df)
533-
overall_performance = (
534-
"no data"
535-
if pd.isna(geo_mean_ratio)
536-
else format_performance(geo_mean_ratio, improvement_threshold, regression_threshold, "overall")
537-
)
538515

539-
summary_lines = [
540-
"## Summary",
541-
"",
542-
f"- **Overall**: {overall_performance}",
543-
]
516+
statistical_analysis = build_statistical_analysis(df3, threshold_pct)
517+
verdict = build_verdict(statistical_analysis) if statistical_analysis is not None else None
518+
519+
summary_fields: list[str] = []
520+
521+
if verdict is not None:
522+
summary_fields.append(f"**Verdict**: {verdict['status']} ({verdict['confidence']} confidence)")
523+
summary_fields.append(f"**Attributed Vortex impact**: {verdict['impact']}")
524+
544525
if len(vortex_df) > 0:
545526
vortex_performance = format_performance(
546527
vortex_geo_mean_ratio,
547528
improvement_threshold,
548529
regression_threshold,
549530
"vortex",
550531
)
551-
summary_lines.append(f"- **Vortex**: {vortex_performance}")
532+
summary_fields.append(f"**Vortex (geomean)**: {vortex_performance}")
552533
if len(parquet_df) > 0:
553534
parquet_performance = format_performance(
554535
parquet_geo_mean_ratio,
555536
improvement_threshold,
556537
regression_threshold,
557538
"parquet",
558539
)
559-
summary_lines.append(f"- **Parquet**: {parquet_performance}")
540+
summary_fields.append(f"**Parquet (geomean)**: {parquet_performance}")
560541

561-
statistical_analysis = build_statistical_analysis(df3, threshold_pct)
562-
verdict = build_verdict(statistical_analysis) if statistical_analysis is not None else None
563542
if verdict is not None:
564-
summary_lines.extend(
565-
[
566-
"",
567-
"## Verdict",
568-
"",
569-
f"**{verdict['status']}**",
570-
f"- **Attributed Vortex impact**: {verdict['impact']}",
571-
f"- **Confidence**: {verdict['confidence']}",
572-
f"- **Environment shift**: {verdict['environment_shift']}",
573-
]
574-
)
575-
576-
if statistical_analysis is not None:
577-
systemic_shift = format_ratio_change(statistical_analysis["systemic_shift_ratio"])
578-
control_sigma = format_ratio_change(float(np.exp(statistical_analysis["systemic_shift_std"])))
579-
residual_noise = format_ratio_change(statistical_analysis["residual_noise_ratio"])
580-
summary_lines.extend(
581-
[
582-
"",
583-
"## Statistical Summary",
584-
"",
585-
f"- **Systemic shift ({CONTROL_FORMAT} controls)**: {systemic_shift}",
586-
f"- **Control sigma**: {control_sigma}",
587-
f"- **Residual noise**: {residual_noise}",
588-
]
589-
)
590-
591-
polish = statistical_analysis["median_polish"]
592-
if polish is not None:
593-
summary_lines.append(f"- **Median polish overall**: {format_ratio_change(float(np.exp(polish.overall)))}")
594-
595-
print("\n".join(summary_lines))
543+
shifts = f"Parquet (control) {verdict['environment_shift']}"
544+
if statistical_analysis is not None:
545+
polish = statistical_analysis["median_polish"]
546+
if polish is not None:
547+
shifts += f" · Median polish {format_ratio_change(float(np.exp(polish.overall)))}"
548+
summary_fields.append(f"**Shifts**: {shifts}")
549+
550+
print("<br>".join(summary_fields))
551+
print("")
552+
print("---")
596553
print("")
597554

598555
if statistical_analysis is not None:

0 commit comments

Comments
 (0)