Skip to content

Commit 7fa0c26

Browse files
committed
save scale_factor
Signed-off-by: Andrew Duffy <andrew@a10y.dev>
1 parent 2912d41 commit 7fa0c26

2 files changed

Lines changed: 17 additions & 7 deletions

File tree

scripts/capture-file-sizes.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ def main():
5858
if format_name not in formats_to_capture:
5959
continue
6060

61+
# Extract scale factor from path (e.g., "1.0" for tpch/1.0/vortex-file-compressed)
62+
# Default to "1.0" if no intermediate directory (e.g., clickbench)
63+
path_between = format_dir.relative_to(benchmark_dir).parent
64+
scale_factor = str(path_between) if str(path_between) != "." else "1.0"
65+
6166
# Capture all files in this format directory
6267
for file_path in format_dir.rglob("*"):
6368
if not file_path.is_file():
@@ -70,14 +75,15 @@ def main():
7075
{
7176
"commit_id": args.commit,
7277
"benchmark": args.benchmark,
78+
"scale_factor": scale_factor,
7379
"format": format_name,
7480
"file": str(relative_path),
7581
"size_bytes": size_bytes,
7682
}
7783
)
7884

7985
# Sort for deterministic output
80-
records.sort(key=lambda r: (r["benchmark"], r["format"], r["file"]))
86+
records.sort(key=lambda r: (r["benchmark"], r["scale_factor"], r["format"], r["file"]))
8187

8288
# Write JSONL output
8389
with open(args.output, "w") as f:

scripts/compare-file-sizes.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@ def main():
5151
with open(args.base_file) as f:
5252
for line in f:
5353
record = json.loads(line)
54-
key = (record["benchmark"], record["format"], record["file"])
54+
# Support old records without scale_factor (default to "1.0")
55+
scale_factor = record.get("scale_factor", "1.0")
56+
key = (record["benchmark"], scale_factor, record["format"], record["file"])
5557
base_data[key] = record["size_bytes"]
5658
except FileNotFoundError:
5759
print("_Base file sizes not found._")
@@ -62,7 +64,8 @@ def main():
6264
with open(args.head_file) as f:
6365
for line in f:
6466
record = json.loads(line)
65-
key = (record["benchmark"], record["format"], record["file"])
67+
scale_factor = record.get("scale_factor", "1.0")
68+
key = (record["benchmark"], scale_factor, record["format"], record["file"])
6669
head_data[key] = record["size_bytes"]
6770
except FileNotFoundError:
6871
print("_HEAD file sizes not found._")
@@ -74,7 +77,7 @@ def main():
7477

7578
all_keys = set(base_data.keys()) | set(head_data.keys())
7679
for key in all_keys:
77-
benchmark, fmt, file_name = key
80+
benchmark, scale_factor, fmt, file_name = key
7881
base_size = base_data.get(key, 0)
7982
head_size = head_data.get(key, 0)
8083

@@ -95,6 +98,7 @@ def main():
9598
comparisons.append(
9699
{
97100
"file": file_name,
101+
"scale_factor": scale_factor,
98102
"format": fmt,
99103
"base_size": base_size,
100104
"head_size": head_size,
@@ -111,14 +115,14 @@ def main():
111115
comparisons.sort(key=lambda x: x["pct_change"], reverse=True)
112116

113117
# Output markdown table
114-
print("| File | Format | Base | HEAD | Change | % |")
115-
print("|------|--------|------|------|--------|---|")
118+
print("| File | Scale | Format | Base | HEAD | Change | % |")
119+
print("|------|-------|--------|------|------|--------|---|")
116120

117121
for comp in comparisons:
118122
pct_str = format_pct_change(comp["pct_change"]) if comp["pct_change"] != float("inf") else "new"
119123
base_str = format_size(comp["base_size"]) if comp["base_size"] > 0 else "-"
120124
print(
121-
f"| {comp['file']} | {comp['format']} | {base_str} | "
125+
f"| {comp['file']} | {comp['scale_factor']} | {comp['format']} | {base_str} | "
122126
f"{format_size(comp['head_size'])} | {format_change(comp['change'])} | {pct_str} |"
123127
)
124128

0 commit comments

Comments
 (0)