From 5fbb36a3eba892cd1183368e77e051170e3d87bc Mon Sep 17 00:00:00 2001 From: Kevin Peterson Date: Wed, 3 Jun 2026 22:57:58 -0400 Subject: [PATCH] feat: add public HTTPS sandbox egress mode --- .env.example | 9 +++-- docs/CONFIGURATION.md | 7 +++- src/config/__init__.py | 11 +++++- src/main.py | 2 + src/services/sandbox/egress_proxy.py | 25 +++++++++--- tests/unit/test_egress_proxy.py | 57 ++++++++++++++++++++++++++++ 6 files changed, 99 insertions(+), 12 deletions(-) diff --git a/.env.example b/.env.example index 3bca089..9f0cbc6 100644 --- a/.env.example +++ b/.env.example @@ -19,12 +19,13 @@ API_KEY=your-secure-api-key-here-change-this-in-production # 3. (none, when AUTH_ENABLED=false) # ── Sandbox network access (skill installs) ─────────────────── -# When ENABLE_SANDBOX_NETWORK=true, sandboxes can reach the internet but only -# through an inline allowlist proxy that permits PyPI, npm, Go modules, and -# crates.io. Required for skills that pip/npm/go install dependencies at -# runtime. Off by default (sandboxes are isolated). +# When ENABLE_SANDBOX_NETWORK=true, sandboxes can reach the internet through +# an inline proxy. Default allowlist mode permits PyPI, npm, Go modules, and +# crates.io. Use public_https mode to permit arbitrary public HTTPS endpoints +# while still blocking private/link-local/internal addresses. # # ENABLE_SANDBOX_NETWORK=false +# SANDBOX_EGRESS_MODE=allowlist # allowlist or public_https # SANDBOX_EGRESS_PORT=18443 # local-only, sandbox -> proxy # SANDBOX_EGRESS_ALLOWLIST= # comma-separated extra hosts # SKILL_DEPS_PATH=/opt/skill-deps # backing volume mount diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 761dd80..0cd3eaa 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -270,13 +270,16 @@ Inactive states are automatically archived to S3 for long-term storage. | `ENABLE_NETWORK_ISOLATION` | `true` | Enable network isolation for sandboxes | | `ENABLE_FILESYSTEM_ISOLATION` | `true` | Enable filesystem isolation | -### Sandbox Network Access (Skill Installs) +### Sandbox Network Access -Off by default — sandboxes have no network access. When enabled, an inline allowlist HTTPS proxy on `127.0.0.1` lets sandboxes reach **only** package registries (PyPI, npm, Go modules, crates.io). Required for "skills" that `pip install` / `npm install` / `go get` / `cargo install` dependencies at runtime. +Off by default — sandboxes have no network access. When enabled, an inline HTTPS proxy on `127.0.0.1` lets sandboxes reach external hosts without giving sandbox processes direct network access. The default `allowlist` mode permits only package registries (PyPI, npm, Go modules, crates.io) plus any hostnames in `SANDBOX_EGRESS_ALLOWLIST`. This is required for "skills" that `pip install` / `npm install` / `go get` / `cargo install` dependencies at runtime. + +For agent workflows that need to retrieve data from public APIs, set `SANDBOX_EGRESS_MODE=public_https`. Public HTTPS mode permits arbitrary public HTTPS hostnames on port `443`, while still rejecting private, loopback, link-local, reserved, multicast, and unspecified IP addresses. The iptables egress firewall remains active, so sandbox processes still cannot bypass the proxy with direct sockets. | Variable | Default | Description | | -------------------------- | --------------------- | --------------------------------------------------------------------------------- | | `ENABLE_SANDBOX_NETWORK` | `false` | Allow sandboxes to reach the internet via the inline allowlist proxy | +| `SANDBOX_EGRESS_MODE` | `allowlist` | Egress mode: `allowlist` or `public_https` | | `SANDBOX_EGRESS_PORT` | `18443` | Port the inline egress proxy binds to on `127.0.0.1` | | `SANDBOX_EGRESS_ALLOWLIST` | (registries default) | Comma-separated list of additional hostnames the proxy permits | | `SKILL_DEPS_PATH` | `/opt/skill-deps` | Host-side directory mounted into every sandbox so install caches compound across runs | diff --git a/src/config/__init__.py b/src/config/__init__.py index 52f367c..110139f 100644 --- a/src/config/__init__.py +++ b/src/config/__init__.py @@ -20,7 +20,7 @@ import secrets from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Literal, Optional import structlog from pydantic import Field, validator @@ -111,6 +111,15 @@ class Settings(BaseSettings): "everything else is refused." ), ) + sandbox_egress_mode: Literal["allowlist", "public_https"] = Field( + default="allowlist", + description=( + "Sandbox egress proxy mode. 'allowlist' permits default package " + "registries plus SANDBOX_EGRESS_ALLOWLIST hosts. 'public_https' " + "permits arbitrary public HTTPS hosts while still blocking " + "private, loopback, link-local, reserved, and multicast IPs." + ), + ) sandbox_egress_port: int = Field( default=18443, ge=1024, diff --git a/src/main.py b/src/main.py index 9df350e..fca1a64 100644 --- a/src/main.py +++ b/src/main.py @@ -177,6 +177,7 @@ async def _startup_egress_proxy(app: FastAPI) -> None: proxy = EgressProxy( port=settings.sandbox_egress_port, allowlist=list(DEFAULT_ALLOWLIST) + extra, + allow_public_https=settings.sandbox_egress_mode == "public_https", ) await proxy.start() app.state.egress_proxy = proxy @@ -207,6 +208,7 @@ async def _startup_egress_proxy(app: FastAPI) -> None: logger.info( "Sandbox network access ENABLED via egress proxy + firewall", port=settings.sandbox_egress_port, + egress_mode=settings.sandbox_egress_mode, skill_deps_path=str(deps_root), sandbox_uid=sandbox_uid, allowlist_extra=extra or None, diff --git a/src/services/sandbox/egress_proxy.py b/src/services/sandbox/egress_proxy.py index 4f87bfa..823c687 100644 --- a/src/services/sandbox/egress_proxy.py +++ b/src/services/sandbox/egress_proxy.py @@ -10,11 +10,14 @@ - Refuses to open tunnels to private IP ranges (RFC 1918, loopback, link-local) even if a public hostname resolves to one. This stops trivial SSRF against Redis/S3/etc. on the same docker network. -- Refuses any request whose host doesn't match the allowlist. +- Refuses any request whose host doesn't match the allowlist, unless + public HTTPS mode is explicitly enabled. Allowlist defaults cover Python (PyPI), Node (npmjs), Go modules, and Rust crates so `pip install`, `npm install`, `go get`, `cargo add` work -out of the box. Add more via SANDBOX_EGRESS_ALLOWLIST=host1,host2. +out of the box. Add more via SANDBOX_EGRESS_ALLOWLIST=host1,host2. Operators +can set SANDBOX_EGRESS_MODE=public_https to permit arbitrary public HTTPS +hosts while retaining private/link-local IP blocking. """ from __future__ import annotations @@ -145,9 +148,11 @@ def __init__( port: int, allowlist: Iterable[str] = DEFAULT_ALLOWLIST, bind_host: str = "127.0.0.1", + allow_public_https: bool = False, ): self.port = port self.bind_host = bind_host + self.allow_public_https = allow_public_https self.allowlist: Set[str] = {h.strip().lower() for h in allowlist if h.strip()} self._server: Optional[asyncio.base_events.Server] = None self._serve_task: Optional[asyncio.Task] = None @@ -164,6 +169,7 @@ async def start(self) -> None: logger.info( "Sandbox egress proxy started", bind=f"{self.bind_host}:{self.port}", + mode="public_https" if self.allow_public_https else "allowlist", allowlist_size=len(self.allowlist), ) @@ -235,15 +241,24 @@ async def _handle_client( return host = _normalize_host(host) - # Allowlist check on the host *before* we resolve it, so audit logs show - # the requested host even when DNS would have failed. + # Host checks run *before* DNS so audit logs show the requested host even + # when resolution would fail. if _is_private_ip(host): logger.warning( "Egress proxy refused private IP literal", host=host, peer=peer ) await self._reply_and_close(client_writer, 403, "Forbidden") return - if not _matches_allowlist(host, self.allowlist): + if self.allow_public_https and port != 443: + logger.warning( + "Egress proxy refused public HTTPS request on non-HTTPS port", + host=host, + port=port, + peer=peer, + ) + await self._reply_and_close(client_writer, 403, "Forbidden") + return + if not self.allow_public_https and not _matches_allowlist(host, self.allowlist): logger.warning( "Egress proxy refused non-allowlisted host", host=host, peer=peer ) diff --git a/tests/unit/test_egress_proxy.py b/tests/unit/test_egress_proxy.py index f53c1ea..dc77445 100644 --- a/tests/unit/test_egress_proxy.py +++ b/tests/unit/test_egress_proxy.py @@ -118,6 +118,63 @@ async def test_private_ip_literal_returns_403(): await proxy.stop() +@pytest.mark.asyncio +async def test_public_https_mode_accepts_non_allowlisted_host(): + """Public HTTPS mode should permit arbitrary public hostnames. + + The test uses an unresolvable hostname so a successful allow check produces + 502 from DNS resolution, not 403 from the allowlist. + """ + port = _free_port() + proxy = EgressProxy( + port=port, + allowlist={"good.test"}, + allow_public_https=True, + ) + await proxy.start() + try: + status, _r, w = await _send_connect(port, "arbitrary-public-api.test:443") + w.close() + assert b"403" not in status, status + assert b"502" in status, status + finally: + await proxy.stop() + + +@pytest.mark.asyncio +async def test_public_https_mode_still_rejects_private_ip_literal(): + port = _free_port() + proxy = EgressProxy( + port=port, + allowlist={"good.test"}, + allow_public_https=True, + ) + await proxy.start() + try: + status, _r, w = await _send_connect(port, "10.0.0.1:443") + w.close() + assert b"403" in status, status + finally: + await proxy.stop() + + +@pytest.mark.asyncio +async def test_public_https_mode_rejects_non_https_ports(): + port = _free_port() + proxy = EgressProxy( + port=port, + allowlist={"good.test"}, + allow_public_https=True, + ) + await proxy.start() + try: + status, _r, w = await _send_connect(port, "arbitrary-public-api.test:22") + w.close() + assert b"403" in status, status + finally: + await proxy.stop() + + @pytest.mark.asyncio async def test_loopback_literal_returns_403(): port = _free_port()