|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Download and extract the Pyodide release matching PYODIDE_VERSION |
| 3 | +defined in src/lib/config/pyodide.ts. |
| 4 | +
|
| 5 | +The full Pyodide tarball ships every package in the distribution (>1 GB), |
| 6 | +which exceeds GitHub Pages' per-file limit. We only keep the runtime |
| 7 | +core plus the packages our notebooks actually load (numpy, scipy, |
| 8 | +micropip, matplotlib + transitive deps as listed in pyodide-lock.json). |
| 9 | +
|
| 10 | +Idempotent: skips download when static/pyodide/.version already records |
| 11 | +the matching version. |
| 12 | +""" |
| 13 | + |
| 14 | +from __future__ import annotations |
| 15 | + |
| 16 | +import json |
| 17 | +import re |
| 18 | +import shutil |
| 19 | +import subprocess |
| 20 | +import sys |
| 21 | +import tarfile |
| 22 | +import tempfile |
| 23 | +from pathlib import Path |
| 24 | + |
| 25 | +PROJECT_ROOT = Path(__file__).resolve().parent.parent |
| 26 | +CONFIG_FILE = PROJECT_ROOT / "src" / "lib" / "config" / "pyodide.ts" |
| 27 | +TARGET_DIR = PROJECT_ROOT / "static" / "pyodide" |
| 28 | +VERSION_MARKER = TARGET_DIR / ".version" |
| 29 | + |
| 30 | +# Packages we explicitly load — runtime deps are resolved via pyodide-lock.json |
| 31 | +ROOT_PACKAGES = ("numpy", "scipy", "micropip", "matplotlib") |
| 32 | +LOCKFILE_NAME = "pyodide-lock.json" |
| 33 | + |
| 34 | + |
| 35 | +def read_version() -> str: |
| 36 | + content = CONFIG_FILE.read_text() |
| 37 | + match = re.search(r"PYODIDE_VERSION\s*=\s*['\"]([^'\"]+)['\"]", content) |
| 38 | + if not match: |
| 39 | + raise RuntimeError(f"PYODIDE_VERSION not found in {CONFIG_FILE}") |
| 40 | + return match.group(1) |
| 41 | + |
| 42 | + |
| 43 | +def already_installed(version: str) -> bool: |
| 44 | + if not VERSION_MARKER.exists(): |
| 45 | + return False |
| 46 | + return VERSION_MARKER.read_text().strip() == version |
| 47 | + |
| 48 | + |
| 49 | +def resolve_packages(tar: tarfile.TarFile) -> tuple[set[str], set[str]]: |
| 50 | + """Read pyodide-lock.json and return (required_files, all_package_files). |
| 51 | +
|
| 52 | + required_files: filenames needed for ROOT_PACKAGES + transitive depends. |
| 53 | + all_package_files: filenames of every package the lockfile knows about |
| 54 | + (used to drop non-required ones during extraction). |
| 55 | + """ |
| 56 | + member = tar.getmember(f"pyodide/{LOCKFILE_NAME}") |
| 57 | + f = tar.extractfile(member) |
| 58 | + if f is None: |
| 59 | + raise RuntimeError(f"Could not extract {LOCKFILE_NAME}") |
| 60 | + lock = json.load(f) |
| 61 | + packages = lock["packages"] |
| 62 | + |
| 63 | + visited: set[str] = set() |
| 64 | + queue = list(ROOT_PACKAGES) |
| 65 | + while queue: |
| 66 | + name = queue.pop() |
| 67 | + if name in visited: |
| 68 | + continue |
| 69 | + visited.add(name) |
| 70 | + if name not in packages: |
| 71 | + print(f" Warning: dependency '{name}' not found in lockfile") |
| 72 | + continue |
| 73 | + queue.extend(packages[name].get("depends", [])) |
| 74 | + |
| 75 | + required = {packages[name]["file_name"] for name in visited if name in packages} |
| 76 | + all_files = {pkg["file_name"] for pkg in packages.values()} |
| 77 | + print(f" Resolved {len(visited)} packages → {len(required)} files kept, " |
| 78 | + f"{len(all_files) - len(required)} dropped") |
| 79 | + return required, all_files |
| 80 | + |
| 81 | + |
| 82 | +def make_filter(required_files: set[str], all_package_files: set[str]): |
| 83 | + def keep(member: tarfile.TarInfo, path: str): |
| 84 | + base = Path(member.name).name |
| 85 | + if base.endswith("-tests.tar"): |
| 86 | + return None |
| 87 | + # Drop non-required package artifacts; .whl.metadata follows its .whl. |
| 88 | + owner = base.removesuffix(".metadata") if base.endswith(".metadata") else base |
| 89 | + if owner in all_package_files and owner not in required_files: |
| 90 | + return None |
| 91 | + return tarfile.data_filter(member, path) |
| 92 | + |
| 93 | + return keep |
| 94 | + |
| 95 | + |
| 96 | +def download_and_extract(version: str) -> None: |
| 97 | + url = ( |
| 98 | + f"https://github.com/pyodide/pyodide/releases/download/" |
| 99 | + f"{version}/pyodide-{version}.tar.bz2" |
| 100 | + ) |
| 101 | + print(f"Downloading {url} ...") |
| 102 | + |
| 103 | + if TARGET_DIR.exists(): |
| 104 | + shutil.rmtree(TARGET_DIR) |
| 105 | + TARGET_DIR.parent.mkdir(parents=True, exist_ok=True) |
| 106 | + |
| 107 | + with tempfile.NamedTemporaryFile(suffix=".tar.bz2", delete=False) as tmp: |
| 108 | + tarball = Path(tmp.name) |
| 109 | + subprocess.run(["curl", "-fL", "-o", str(tarball), url], check=True) |
| 110 | + |
| 111 | + try: |
| 112 | + with tarfile.open(tarball, "r:bz2") as tar: |
| 113 | + print("Resolving package dependencies from lockfile ...") |
| 114 | + required, all_pkg_files = resolve_packages(tar) |
| 115 | + |
| 116 | + print(f"Extracting to {TARGET_DIR} ...") |
| 117 | + with tempfile.TemporaryDirectory() as extract_dir: |
| 118 | + tar.extractall(extract_dir, filter=make_filter(required, all_pkg_files)) |
| 119 | + inner = Path(extract_dir) / "pyodide" |
| 120 | + if not inner.is_dir(): |
| 121 | + raise RuntimeError( |
| 122 | + f"Expected 'pyodide/' subdirectory in tarball, got: " |
| 123 | + f"{[p.name for p in Path(extract_dir).iterdir()]}" |
| 124 | + ) |
| 125 | + shutil.move(str(inner), str(TARGET_DIR)) |
| 126 | + finally: |
| 127 | + tarball.unlink(missing_ok=True) |
| 128 | + |
| 129 | + VERSION_MARKER.write_text(version + "\n") |
| 130 | + size_mb = sum(f.stat().st_size for f in TARGET_DIR.rglob("*") if f.is_file()) / 1024 / 1024 |
| 131 | + print(f"Pyodide {version} installed at {TARGET_DIR} ({size_mb:.1f} MiB)") |
| 132 | + |
| 133 | + |
| 134 | +def main() -> int: |
| 135 | + version = read_version() |
| 136 | + if already_installed(version): |
| 137 | + print(f"Pyodide {version} already present at {TARGET_DIR}, skipping") |
| 138 | + return 0 |
| 139 | + download_and_extract(version) |
| 140 | + return 0 |
| 141 | + |
| 142 | + |
| 143 | +if __name__ == "__main__": |
| 144 | + sys.exit(main()) |
0 commit comments