Skip to content

Commit ea59015

Browse files
hugovkradarhere
andauthored
Compare dist sizes vs latest PyPI release (#9621)
Co-authored-by: Andrew Murray <radarhere@users.noreply.github.com>
1 parent 24696af commit ea59015

2 files changed

Lines changed: 294 additions & 0 deletions

File tree

.github/compare-dist-sizes.py

Lines changed: 271 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,271 @@
1+
"""Compare sizes of newly-built dists against the latest release on PyPI.
2+
3+
Fetches file sizes for the latest Pillow release from the PyPI JSON API
4+
(no download required) and compares them to a directory of freshly-built
5+
wheels and sdist. Outputs a table to stdout (and to
6+
`$GITHUB_STEP_SUMMARY` if set).
7+
8+
Usage:
9+
`uv run .github/compare-dist-sizes.py <dist-dir>`
10+
"""
11+
12+
# /// script
13+
# requires-python = ">=3.10"
14+
# dependencies = [
15+
# "humanize",
16+
# "prettytable",
17+
# "termcolor",
18+
# ]
19+
# ///
20+
21+
from __future__ import annotations
22+
23+
import argparse
24+
import json
25+
import os
26+
import re
27+
import sys
28+
import urllib.request
29+
from pathlib import Path
30+
31+
import humanize
32+
from prettytable import PrettyTable, TableStyle
33+
from termcolor import colored
34+
35+
PYPI_JSON_URL = "https://pypi.org/pypi/pillow/json"
36+
37+
# Wheel filename: {distribution}-{version}(-{build})?-{python}-{abi}-{platform}.whl
38+
# sdist filename: {distribution}-{version}.tar.gz
39+
WHEEL_RE = re.compile(
40+
r"^[^-]+-[^-]+(?:-(?P<build>\d[^-]*))?"
41+
r"-(?P<python>[^-]+)-(?P<abi>[^-]+)-(?P<platform>[^-]+)\.whl$",
42+
re.IGNORECASE,
43+
)
44+
SDIST_RE = re.compile(
45+
r"^(?P<dist>[^-]+)-(?P<version>.+)\.tar\.gz$",
46+
re.IGNORECASE,
47+
)
48+
49+
50+
def key_for(filename: str) -> str:
51+
"""Return a version-independent identifier for a dist file."""
52+
if m := WHEEL_RE.match(filename):
53+
build = f"{m['build']}-" if m["build"] else ""
54+
return f"wheel:{build}{m['python']}-{m['abi']}-{m['platform']}"
55+
if SDIST_RE.match(filename):
56+
return "sdist"
57+
msg = f"Unexpected dist name: {filename}"
58+
raise ValueError(msg)
59+
60+
61+
def display_for(filename: str) -> str:
62+
"""Strip the `pillow-{version}-` prefix for compact table display."""
63+
if m := WHEEL_RE.match(filename):
64+
build = f"{m['build']}-" if m["build"] else ""
65+
return f"{build}{m['python']}-{m['abi']}-{m['platform']}.whl"
66+
if SDIST_RE.match(filename):
67+
return "sdist (.tar.gz)"
68+
return filename
69+
70+
71+
def fetch_pypi_sizes() -> tuple[str, dict[str, tuple[str, int]]]:
72+
"""Return (version, {key: (filename, size)}) for the latest PyPI release."""
73+
with urllib.request.urlopen(PYPI_JSON_URL) as response:
74+
data = json.load(response)
75+
version = data["info"]["version"]
76+
sizes: dict[str, tuple[str, int]] = {}
77+
for entry in data.get("urls", []):
78+
filename = entry["filename"]
79+
key = key_for(filename)
80+
sizes[key] = (filename, entry["size"])
81+
return version, sizes
82+
83+
84+
def collect_local_sizes(dist_dir: Path) -> dict[str, tuple[str, int]]:
85+
sizes: dict[str, tuple[str, int]] = {}
86+
for path in sorted(dist_dir.iterdir()):
87+
if not path.is_file():
88+
continue
89+
key = key_for(path.name)
90+
sizes[key] = (path.name, path.stat().st_size)
91+
return sizes
92+
93+
94+
def human(n: int | None) -> str:
95+
if n is None:
96+
return "n/a"
97+
return humanize.naturalsize(n)
98+
99+
100+
def pct_change(before: int | None, after: int | None) -> str:
101+
if before is None or after is None:
102+
return "n/a"
103+
delta = 0 if before == 0 else (after - before) / before * 100
104+
return f"{delta:+.2f}%"
105+
106+
107+
def pct_severity(text: str) -> dict[str, str] | None:
108+
"""Return status indicators based on the change percent."""
109+
if text == "n/a":
110+
return None
111+
pct = float(text.rstrip("%"))
112+
if pct >= 5:
113+
return {"color": "red", "emoji": "🔴"}
114+
if pct > 0:
115+
return {"color": "yellow", "emoji": "🟡"}
116+
else:
117+
return {"color": "green", "emoji": "🟢"}
118+
119+
120+
def render_table(
121+
baseline_label: str,
122+
baseline_sizes: dict[str, tuple[str, int]],
123+
local_sizes: dict[str, tuple[str, int]],
124+
*,
125+
markdown: bool,
126+
) -> str:
127+
table = PrettyTable()
128+
table.set_style(TableStyle.MARKDOWN if markdown else TableStyle.SINGLE_BORDER)
129+
table.field_names = ["File", "Size before", "Size now", "Change"]
130+
table.align = "r"
131+
table.align["File"] = "l"
132+
133+
def style(cells: list[str], role: str) -> list[str]:
134+
severity = pct_severity(cells[3])
135+
if markdown:
136+
if severity:
137+
cells[3] = f"{severity['emoji']} {cells[3]}"
138+
if role == "orphan":
139+
return [f"*{c}*" for c in cells]
140+
if role == "summary":
141+
return [f"**{c}**" for c in cells]
142+
return cells
143+
144+
if role == "orphan":
145+
return [colored(c, "dark_grey") for c in cells]
146+
147+
bold_attrs = ["bold"] if role == "summary" else []
148+
if bold_attrs:
149+
cells[:3] = [colored(c, attrs=bold_attrs) for c in cells[:3]]
150+
if severity:
151+
cells[3] = colored(cells[3], severity["color"], attrs=bold_attrs)
152+
elif bold_attrs:
153+
cells[3] = colored(cells[3], attrs=bold_attrs)
154+
return cells
155+
156+
keys = list(set(baseline_sizes) | set(local_sizes))
157+
# Put sdist first for readability
158+
keys.sort(key=lambda k: (k != "sdist", k))
159+
160+
wheel_before = []
161+
wheel_after = []
162+
total_before = []
163+
total_after = []
164+
for key in keys:
165+
baseline_entry = baseline_sizes.get(key)
166+
local_entry = local_sizes.get(key)
167+
display_name = display_for((local_entry or baseline_entry)[0])
168+
before = baseline_entry[1] if baseline_entry else None
169+
after = local_entry[1] if local_entry else None
170+
if after is None:
171+
# Removed since baseline: ignore in totals
172+
role = "orphan"
173+
else:
174+
# Present locally (in both, or newly added): count in totals
175+
total_after.append(after)
176+
if before is not None:
177+
total_before.append(before)
178+
if key != "sdist":
179+
wheel_after.append(after)
180+
if before is not None:
181+
wheel_before.append(before)
182+
role = "data"
183+
cells = [
184+
display_name,
185+
human(before),
186+
human(after),
187+
pct_change(before, after),
188+
]
189+
table.add_row(style(cells, role))
190+
191+
if not markdown:
192+
table.add_divider()
193+
194+
if wheel_after:
195+
avg_before = sum(wheel_before) // len(wheel_before) if wheel_before else None
196+
table.add_row(
197+
style(
198+
[
199+
f"wheel average ({len(wheel_after)} wheels)",
200+
human(avg_before),
201+
human(sum(wheel_after) // len(wheel_after)),
202+
pct_change(avg_before, sum(wheel_after) // len(wheel_after)),
203+
],
204+
"summary",
205+
)
206+
)
207+
table.add_row(
208+
style(
209+
[
210+
f"wheel total ({len(wheel_after)} wheels)",
211+
human(sum(wheel_before)),
212+
human(sum(wheel_after)),
213+
pct_change(sum(wheel_before), sum(wheel_after)),
214+
],
215+
"summary",
216+
),
217+
divider=not markdown,
218+
)
219+
220+
if total_after:
221+
table.add_row(
222+
style(
223+
[
224+
f"artifacts total ({len(total_after)} artifacts)",
225+
human(sum(total_before)),
226+
human(sum(total_after)),
227+
pct_change(sum(total_before), sum(total_after)),
228+
],
229+
"summary",
230+
)
231+
)
232+
233+
title = f"## Dist size comparison vs {baseline_label}"
234+
if not markdown:
235+
title = colored(title, attrs=["bold"])
236+
return f"{title}\n\n{table.get_string()}\n"
237+
238+
239+
def main() -> int:
240+
parser = argparse.ArgumentParser(
241+
description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter
242+
)
243+
parser.add_argument(
244+
"dist_dir",
245+
type=Path,
246+
help="Directory containing newly-built wheels and sdist",
247+
)
248+
args = parser.parse_args()
249+
250+
if not args.dist_dir.is_dir():
251+
print(f"error: {args.dist_dir} is not a directory", file=sys.stderr)
252+
return 1
253+
254+
baseline_version, baseline_sizes = fetch_pypi_sizes()
255+
baseline_label = f"Pillow {baseline_version} on PyPI"
256+
257+
local_sizes = collect_local_sizes(args.dist_dir)
258+
259+
print(render_table(baseline_label, baseline_sizes, local_sizes, markdown=False))
260+
261+
if summary_path := os.environ.get("GITHUB_STEP_SUMMARY"):
262+
with open(summary_path, "a", encoding="utf-8") as f:
263+
f.write(
264+
render_table(baseline_label, baseline_sizes, local_sizes, markdown=True)
265+
)
266+
267+
return 0
268+
269+
270+
if __name__ == "__main__":
271+
sys.exit(main())

.github/workflows/wheels.yml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ on:
1313
paths: &paths
1414
- ".ci/requirements-cibw.txt"
1515
- ".ci/requirements-sbom.txt"
16+
- ".github/compare-dist-sizes.py"
1617
- ".github/dependencies.json"
1718
- ".github/generate-sbom.py"
1819
- ".github/workflows/wheels*"
@@ -255,6 +256,28 @@ jobs:
255256
echo $files
256257
[ "$files" -eq $EXPECTED_DISTS ] || exit 1
257258
259+
compare-dist-sizes:
260+
needs: [build-native-wheels, windows, sdist]
261+
runs-on: ubuntu-latest
262+
name: Compare dist sizes vs PyPI
263+
steps:
264+
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
265+
with:
266+
persist-credentials: false
267+
268+
- uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8.1.0
269+
with:
270+
enable-cache: false
271+
272+
- uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
273+
with:
274+
pattern: dist-*
275+
path: dist
276+
merge-multiple: true
277+
278+
- name: Compare dist sizes vs latest PyPI release
279+
run: uv run .github/compare-dist-sizes.py dist
280+
258281
scientific-python-nightly-wheels-publish:
259282
if: github.event.repository.fork == false && (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch')
260283
needs: count-dists

0 commit comments

Comments
 (0)