Skip to content

Commit 665d78a

Browse files
authored
Merge pull request #33 from ArchiveBox/claude/magical-pascal-iR9iO
Honor PUPPETEER_CACHE_DIR + PLAYWRIGHT_BROWSERS_PATH env vars
2 parents d9d3cb6 + 4aa46ab commit 665d78a

13 files changed

Lines changed: 485 additions & 163 deletions

.github/workflows/deploy-pages.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ concurrency:
1616

1717
jobs:
1818
build:
19-
runs-on: ubuntu-latest
19+
runs-on:
20+
group: Default
2021
timeout-minutes: 20
2122
steps:
2223
- name: Checkout
@@ -48,7 +49,8 @@ jobs:
4849
environment:
4950
name: github-pages
5051
url: ${{ steps.deployment.outputs.page_url }}
51-
runs-on: ubuntu-latest
52+
runs-on:
53+
group: Default
5254
needs: build
5355
timeout-minutes: 20
5456
steps:

.github/workflows/release.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ concurrency:
1919

2020
jobs:
2121
release-state:
22-
runs-on: ubuntu-latest
22+
runs-on:
23+
group: Default
2324
timeout-minutes: 20
2425
steps:
2526
- uses: actions/checkout@v6

.github/workflows/tests.yml

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ concurrency:
1919

2020
jobs:
2121
precheck:
22-
runs-on: ubuntu-latest
22+
runs-on:
23+
group: Default
2324
timeout-minutes: 20
2425

2526
steps:
@@ -30,6 +31,11 @@ jobs:
3031
with:
3132
python-version: '3.12'
3233

34+
- name: Setup Node
35+
uses: actions/setup-node@v6
36+
with:
37+
node-version: '22'
38+
3339
- name: Install uv
3440
uses: astral-sh/setup-uv@v8.0.0
3541
with:
@@ -49,7 +55,8 @@ jobs:
4955

5056
discover-standard-tests:
5157
needs: precheck
52-
runs-on: ubuntu-latest
58+
runs-on:
59+
group: Default
5360
timeout-minutes: 20
5461
outputs:
5562
test-files: ${{ steps.set-matrix.outputs.test-files }}
@@ -78,7 +85,7 @@ jobs:
7885
echo "$json_array"
7986
8087
build:
81-
name: ${{ matrix.target.os }} py${{ matrix.target.python_version }} ${{ matrix.test.name }}
88+
name: ${{ matrix.target.os_name }} py${{ matrix.target.python_version }} ${{ matrix.test.name }}
8289
needs: [precheck, discover-standard-tests]
8390
runs-on: ${{ matrix.target.os }}
8491
timeout-minutes: 20
@@ -88,11 +95,16 @@ jobs:
8895
max-parallel: 20
8996
matrix:
9097
target:
91-
- os: ubuntu-latest
98+
- os:
99+
group: Default
100+
os_name: linux
92101
python_version: '3.11'
93-
- os: ubuntu-latest
102+
- os:
103+
group: Default
104+
os_name: linux
94105
python_version: '3.14'
95106
- os: macOS-latest
107+
os_name: macOS
96108
python_version: '3.13'
97109
test: ${{ fromJson(needs.discover-standard-tests.outputs.test-files) }}
98110

@@ -219,7 +231,8 @@ jobs:
219231
220232
discover-live-tests:
221233
needs: precheck
222-
runs-on: ubuntu-latest
234+
runs-on:
235+
group: Default
223236
timeout-minutes: 20
224237
outputs:
225238
live-tests: ${{ steps.set-matrix.outputs.live-tests }}
@@ -239,16 +252,20 @@ jobs:
239252
needs_docker=true
240253
fi
241254
242-
os_targets="ubuntu-latest macOS-latest"
255+
os_targets="linux macOS"
243256
if grep -q "@pytest.mark.docker_required" "$test_file" || grep -q "@pytest.mark.root_required" "$test_file" || grep -q 'skipif("darwin"' "$test_file"; then
244-
os_targets="ubuntu-latest"
257+
os_targets="linux"
245258
elif grep -q 'require_tool("brew")' "$test_file" && ! grep -q 'require_tool("apt-get")' "$test_file" && ! grep -q "operations.apt.packages" "$test_file" && ! grep -q "ansible.builtin.apt" "$test_file"; then
246-
os_targets="macOS-latest"
259+
os_targets="macOS"
247260
fi
248261
249262
for os_target in $os_targets; do
250-
os_name=$(printf '%s' "$os_target" | tr '[:upper:]' '[:lower:]' | sed 's/-latest//')
251-
entry="{\"name\":\"${test_name}-${os_name}\",\"path\":\"$test_file\",\"os\":\"$os_target\",\"needs_docker\":$needs_docker}"
263+
if [ "$os_target" = "linux" ]; then
264+
os_json='{"group":"Default"}'
265+
else
266+
os_json='"macOS-latest"'
267+
fi
268+
entry="{\"name\":\"${test_name}-${os_target}\",\"path\":\"$test_file\",\"os\":${os_json},\"os_name\":\"${os_target}\",\"needs_docker\":$needs_docker}"
252269
if [ "$first" = true ]; then first=false; else json_array+=","; fi
253270
json_array+="$entry"
254271
done

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,7 +1077,8 @@ install_root = $ABXPKG_PUPPETEER_ROOT or $ABXPKG_LIB_DIR/puppeteer
10771077
bin_dir = <install_root>/bin
10781078
```
10791079

1080-
- Install root: set `install_root` for the root dir and `bin_dir` for symlinked executables. Downloaded browser artifacts live under `<install_root>/cache` when an install root is pinned. Leave it unset for ambient/global mode, where cache ownership stays with the host and `INSTALLER_BINARY` must already be resolvable from the ambient provider set.
1080+
- Install root: set `install_root` for the root dir and `bin_dir` for symlinked executables. Leave it unset for ambient/global mode, where cache ownership stays with the host and `INSTALLER_BINARY` must already be resolvable from the ambient provider set.
1081+
- Browser cache: when `install_root` is pinned, abxpkg manages `<install_root>/cache` end-to-end — it's exported as `PUPPETEER_CACHE_DIR` to every subprocess, used for `--path=` on `puppeteer-browsers install` / `list`, and `uninstall()` resolves the real browser directory via `load()` then rmtrees it. When `install_root` is unset the provider is in pure passthrough mode: the caller's ambient `$PUPPETEER_CACHE_DIR` (or the CLI's `~/.cache/puppeteer` default) flows through to subprocesses unchanged, `load()` trusts whatever path `puppeteer-browsers list` reports, and `uninstall()` still rmtrees the real browser directory returned by `load()` — leaving any unrelated browsers in the shared cache alone.
10811082
- Auto-switching: bootstraps `@puppeteer/browsers` through `NpmProvider` and then uses that CLI for browser installs.
10821083
- `dry_run`: shared behavior.
10831084
- Security: `min_release_age` is unsupported for browser installs and is ignored with a warning if explicitly requested. `postinstall_scripts=False` is supported for the underlying npm bootstrap path, and `ABXPKG_POSTINSTALL_SCRIPTS` hydrates the provider default here.
@@ -1094,18 +1095,19 @@ Source: [`abxpkg/binprovider_playwright.py`](./abxpkg/binprovider_playwright.py)
10941095
```python
10951096
INSTALLER_BIN = "playwright"
10961097
PATH = ""
1097-
install_root = None # when set, doubles as PLAYWRIGHT_BROWSERS_PATH
1098+
install_root = None # abxpkg-managed root dir for bin_dir / nested npm prefix
10981099
bin_dir = <install_root>/bin # symlink dir for resolved browsers
10991100
euid = 0 # routes exec() through sudo-first-then-fallback
11001101
```
11011102

1102-
- Install root: set `install_root` to pin both the abxpkg root dir AND `PLAYWRIGHT_BROWSERS_PATH` to the same directory. Leave it unset to let playwright use its own OS-default browsers path (`~/.cache/ms-playwright` on Linux etc.) — in that case abxpkg maintains no symlink dir or npm prefix at all, the `playwright` npm CLI bootstraps against the host's npm default, and `load()` returns the resolved `executablePath()` directly. `bin_dir` overrides the symlink directory when `install_root` is pinned.
1103+
- Install root: set `install_root` to pin the abxpkg-managed root dir (where `bin_dir` symlinks and the nested npm prefix live). Leave it unset to let playwright use its own OS-default browsers path (`~/.cache/ms-playwright` on Linux etc.) — in that case abxpkg maintains no symlink dir or npm prefix at all, the `playwright` npm CLI bootstraps against the host's npm default, and `load()` returns the resolved `executablePath()` directly. `bin_dir` overrides the symlink directory when `install_root` is pinned.
1104+
- Browser cache: when `install_root` is pinned, abxpkg manages `<install_root>/cache` end-to-end — exported as `PLAYWRIGHT_BROWSERS_PATH` to every subprocess (including the `env KEY=VAL -- ...` wrapper used when we go through sudo), used to scope `executablePath()` hits on `load()`, and `uninstall()` resolves the real browser directory via `load()` then rmtrees it. When `install_root` is unset the provider is in pure passthrough mode: the caller's ambient `$PLAYWRIGHT_BROWSERS_PATH` (or playwright's `~/.cache/ms-playwright` default on Linux) flows through to subprocesses unchanged, `load()` trusts whatever path `executablePath()` reports, and `uninstall()` still rmtrees the real browser directory returned by `load()`.
11031105
- Auto-switching: bootstraps the `playwright` npm package through `NpmProvider`, then runs `playwright install --with-deps <install_args>` against it. Resolves each installed browser's real executable via the `playwright-core` Node.js API (`chromium.executablePath()` etc.) and writes a symlink into `bin_dir` when one is configured.
11041106
- `dry_run`: shared behavior — the install handler short-circuits to a placeholder without touching the host.
11051107
- Privilege handling: `--with-deps` installs system packages and requires root on Linux. ``euid`` defaults to ``0``, which routes every ``exec()`` call through the base ``BinProvider.exec`` sudo-first-then-fallback path — it tries ``sudo -n -- playwright install --with-deps ...`` first on non-root hosts, falls back to running the command directly if sudo fails or isn't available, and merges both stderr outputs into the final error if both attempts fail.
11061108
- Security: `min_release_age` and `postinstall_scripts=False` are unsupported for browser installs and are ignored with a warning if explicitly requested.
11071109
- Overrides: `install_args` are appended onto `playwright install` after `playwright_install_args` (defaults to `["--with-deps"]`) and passed through verbatim — use whatever browser names / flags the `playwright install` CLI accepts (`chromium`, `firefox`, `webkit`, `--no-shell`, `--only-shell`, `--force`, etc.).
1108-
- Notes: `update()` bumps the `playwright` npm package in `install_root` first (via `NpmProvider.update`) so its pinned browser versions refresh, then re-runs `playwright install --force <install_args>` to pull any new browser builds. `uninstall()` removes the relevant `<bin_name>-*/` directories from `install_root` alongside the bin-dir symlink, since `playwright uninstall` only drops *unused* browsers on its own. Both `update()` and `uninstall()` leave playwright's OS-default cache untouched when `install_root` is unset.
1110+
- Notes: `update()` bumps the `playwright` npm package in `install_root` first (via `NpmProvider.update`) so its pinned browser versions refresh, then re-runs `playwright install --force <install_args>` to pull any new browser builds. `uninstall()` resolves the browser's real install directory via `playwright-core`'s `executablePath()`, walks up to the containing `<bin_name>-<buildId>/` dir, and rmtrees that dir — in both managed and passthrough modes — because `playwright uninstall` itself has no per-browser argument and only drops *unused* browsers wholesale.
11091111

11101112
</details>
11111113

abxpkg/binprovider_apt.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,14 @@ class AptProvider(BinProvider):
2828

2929
def setup_PATH(self, no_cache: bool = False) -> None:
3030
"""Populate PATH on first use from dpkg-discovered package runtime bin dirs, not from apt-get itself."""
31-
if no_cache or (
32-
self._INSTALLER_BINARY is None
31+
# Rebuild PATH on first use, when the caller forces no_cache, or when
32+
# PATH is still empty — the last case covers the "INSTALLER_BINARY was
33+
# resolved out-of-band (hook preflight etc.), so _INSTALLER_BINARY is
34+
# non-None but self.PATH was never populated" race.
35+
if (
36+
no_cache
37+
or not self.PATH
38+
or self._INSTALLER_BINARY is None
3339
or self._INSTALLER_BINARY.loaded_abspath is None
3440
):
3541
dpkg_binary = EnvProvider().load("dpkg")

abxpkg/binprovider_brew.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -183,15 +183,29 @@ def _refresh_bin_link(
183183
except Exception:
184184
break
185185
walk_path = walk_path.parent
186+
# Idempotent refresh: skip when shim already points at target.
187+
# Rewriting on every load() bumps mtime and churns the inode,
188+
# which invalidates fingerprint caches unnecessarily.
189+
if link_path.is_symlink():
190+
try:
191+
if link_path.readlink() == Path(target):
192+
return TypeAdapter(HostBinPath).validate_python(link_path)
193+
except OSError:
194+
pass
186195
if link_path.exists() or link_path.is_symlink():
187196
link_path.unlink(missing_ok=True)
188197
link_path.symlink_to(target)
189198
return TypeAdapter(HostBinPath).validate_python(link_path)
190199

191200
def setup_PATH(self, no_cache: bool = False) -> None:
192201
"""Populate PATH on first use from the resolved brew prefix and known runtime brew bin dirs."""
193-
if no_cache or (
194-
self._INSTALLER_BINARY is None
202+
# Rebuild PATH on first use, when the caller forces no_cache, or when
203+
# PATH is still empty — the last case covers provider copies that
204+
# inherited a resolved ``_INSTALLER_BINARY`` but an unset ``PATH``.
205+
if (
206+
no_cache
207+
or not self.PATH
208+
or self._INSTALLER_BINARY is None
195209
or self._INSTALLER_BINARY.loaded_abspath is None
196210
):
197211
install_root = self.install_root
@@ -391,12 +405,12 @@ def default_abspath_handler(
391405
if not self.PATH:
392406
return None
393407

408+
# Authoritative lookup: search brew's own Cellar / opt / PATH
409+
# entries for the real formula binary. The managed ``bin_dir``
410+
# shim is a convenience side-effect of install — never a source
411+
# of truth — so we always consult brew's paths first and only
412+
# refresh the shim to match the freshly-resolved target.
394413
linked_bin = self._linked_bin_path(bin_name)
395-
if linked_bin is not None:
396-
linked_abspath = bin_abspath(bin_name, PATH=str(self.bin_dir))
397-
if linked_abspath:
398-
return linked_abspath
399-
400414
search_paths = self._brew_search_paths(bin_name, no_cache=no_cache)
401415
abspath = bin_abspath(bin_name, PATH=search_paths)
402416
if abspath:

abxpkg/binprovider_goget.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,15 @@ def default_abspath_handler(
294294
assert bin_dir is not None
295295
link_path = bin_dir / str(bin_name)
296296
link_path.parent.mkdir(parents=True, exist_ok=True)
297+
# Idempotent refresh: skip when shim already points at target.
298+
# Rewriting on every load() bumps mtime and churns the inode,
299+
# which invalidates fingerprint caches unnecessarily.
300+
if link_path.is_symlink():
301+
try:
302+
if link_path.readlink() == Path(direct_abspath):
303+
return TypeAdapter(HostBinPath).validate_python(link_path)
304+
except OSError:
305+
pass
297306
if link_path.exists() or link_path.is_symlink():
298307
link_path.unlink(missing_ok=True)
299308
link_path.symlink_to(direct_abspath)

abxpkg/binprovider_npm.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,15 @@ def _refresh_bin_link(
377377
link_path = self._linked_bin_path(bin_name)
378378
assert link_path is not None, "_refresh_bin_link requires bin_dir to be set"
379379
link_path.parent.mkdir(parents=True, exist_ok=True)
380+
# Idempotent refresh: skip when shim already points at target.
381+
# Rewriting on every load() bumps mtime and churns the inode,
382+
# which invalidates fingerprint caches unnecessarily.
383+
if link_path.is_symlink():
384+
try:
385+
if link_path.readlink() == Path(target):
386+
return TypeAdapter(HostBinPath).validate_python(link_path)
387+
except OSError:
388+
pass
380389
if link_path.exists() or link_path.is_symlink():
381390
link_path.unlink(missing_ok=True)
382391
link_path.symlink_to(target)

0 commit comments

Comments
 (0)