Skip to content

Commit d6f983a

Browse files
author
miranov25
committed
feat(benchmarks): Complete benchmark infrastructure with history and profiling
Add comprehensive benchmark infrastructure for performance tracking: - Row count configuration: quick=500K, default=1M, full=2M rows - Profile naming: bench_<component>_<scenario>_<timestamp>_<commit>.prof - History archiving: Every run archived with git commit info - Diff command: Compare arbitrary history files with threshold detection - History analysis: DataFrame utilities (long/wide format) for custom queries New files: - history_analysis.py: Load history into pandas DataFrames Modified files: - benchmark_materialize_aliases.py: --full flag, profile naming, row counts - baseline_utils.py: diff command, get_git_info() - run_benchmark.sh: --full flag passthrough - README.md: Documentation for new features Usage: ./run_benchmark.sh --full # Full analysis with profiling python baseline_utils.py diff A.json B.json # Compare runs python history_analysis.py list results/history/ # List metrics Part of benchmark infrastructure for Phase 3 join optimization.
1 parent db75dc0 commit d6f983a

2 files changed

Lines changed: 16 additions & 6 deletions

File tree

UTILS/dfextensions/AliasDataFrame/AliasDataFrame.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1657,8 +1657,18 @@ def _run_with_profiling(self, func, profile=False, profile_output=None):
16571657

16581658
if profile_output:
16591659
from pathlib import Path
1660+
1661+
# Save binary .prof for programmatic analysis (pstats, snakeviz)
1662+
if profile_output.endswith('.txt'):
1663+
prof_path = profile_output[:-4] + '.prof'
1664+
else:
1665+
prof_path = profile_output + '.prof'
1666+
profiler.dump_stats(prof_path)
1667+
print(f"[profiler] Binary profile saved to: {prof_path}")
1668+
1669+
# Save text for human reading
16601670
Path(profile_output).write_text(output)
1661-
print(f"[profiler] Results saved to: {profile_output}")
1671+
print(f"[profiler] Text profile saved to: {profile_output}")
16621672
else:
16631673
print(output)
16641674

UTILS/dfextensions/AliasDataFrame/benchmarks/baseline.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,24 @@
11
{
22
"version": 1,
3-
"created": "2025-11-30T20:59:30.057647",
3+
"created": "2025-11-30T21:10:30.196534",
44
"host": "Marians-MBP-3.fritz.box",
55
"python_version": "3.9.6",
66
"cpu_count": 12,
77
"platform": "macOS-14.5-arm64-arm-64bit",
88
"benchmarks": {
99
"benchmark_materialize_aliases.py": {
10-
"time_s": 6.602904751,
10+
"time_s": 2.745856375,
1111
"metrics": {
12-
"direct_vs_safe_speedup": 1.1213459329029052,
13-
"safe_vs_simple_ratio": 33.48148131583073
12+
"direct_vs_safe_speedup": 1.255547595408744,
13+
"safe_vs_simple_ratio": 67.37643196035532
1414
}
1515
},
1616
"benchmark_parallel.py": {
1717
"time_s": null,
1818
"metrics": {}
1919
},
2020
"benchmark_performance.py": {
21-
"time_s": 0.057923543000000466,
21+
"time_s": 0.0549549170000001,
2222
"metrics": {
2323
"all_passed": 1
2424
}

0 commit comments

Comments
 (0)