From 139f1ca644d28990216342710c80dcf00aa67532 Mon Sep 17 00:00:00 2001 From: Pratyush Sharma <56130065+pratyush618@users.noreply.github.com> Date: Thu, 23 Apr 2026 23:54:42 +0530 Subject: [PATCH 1/6] fix(sandbox): require bearer-token auth on embedded FastAPI routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sandbox's embedded server exposed /api/bash, /api/read, /api/write, /api/edit, /api/kill, /api/exists, /api/health without validating the Authorization header the client already sends. Anyone who could reach the public *.hf.space URL could execute arbitrary commands inside the user's sandbox — and read HF_TOKEN from the environment. Add an app-wide FastAPI dependency that reads HF_TOKEN at startup and matches it against the Bearer token on every request using hmac.compare_digest. Fail closed: if HF_TOKEN is unset, every request 401s rather than silently becoming open. Refs huggingface/ml-intern#78 --- agent/tools/sandbox_client.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/agent/tools/sandbox_client.py b/agent/tools/sandbox_client.py index 16982c76..32c56a7c 100644 --- a/agent/tools/sandbox_client.py +++ b/agent/tools/sandbox_client.py @@ -99,8 +99,8 @@ _SANDBOX_SERVER = '''\ """Minimal FastAPI server for sandbox operations.""" -import os, subprocess, pathlib, signal, threading, re, tempfile -from fastapi import FastAPI +import os, subprocess, pathlib, signal, threading, re, tempfile, hmac +from fastapi import FastAPI, HTTPException, Header, Depends from pydantic import BaseModel from typing import Optional import uvicorn @@ -154,7 +154,19 @@ def _atomic_write(path: pathlib.Path, content: str): except OSError: pass -app = FastAPI() +_AUTH_TOKEN = os.environ.get("HF_TOKEN", "") + +def require_auth(authorization: Optional[str] = Header(None)): + # Fail closed: if the sandbox secret isn't set, every request 401s + # rather than silently becoming open again. + if not _AUTH_TOKEN: + raise HTTPException(status_code=401, detail="unauthorized") + if not authorization or not authorization.startswith("Bearer "): + raise HTTPException(status_code=401, detail="unauthorized") + if not hmac.compare_digest(authorization[len("Bearer "):], _AUTH_TOKEN): + raise HTTPException(status_code=401, detail="unauthorized") + +app = FastAPI(dependencies=[Depends(require_auth)]) # Track active bash processes so they can be killed on cancel _active_procs = {} # pid -> subprocess.Popen From 601e3c20f68762f0e017f83760bdf0103e0c8d2e Mon Sep 17 00:00:00 2001 From: Pratyush Sharma <56130065+pratyush618@users.noreply.github.com> Date: Thu, 23 Apr 2026 23:54:50 +0530 Subject: [PATCH 2/6] test(sandbox): cover bearer-token auth on embedded server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six cases against the _SANDBOX_SERVER source loaded via importlib: - missing Authorization header → 401 - wrong Bearer token → 401 - correct Bearer token → 200 - /api/bash with wrong token must 401 before subprocess.Popen is called - fail-closed when HF_TOKEN is unset → every request 401s - /api/write spot-check to confirm the app-wide dependency covers POST routes and the target file is never written --- tests/unit/test_sandbox_server_auth.py | 86 ++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 tests/unit/test_sandbox_server_auth.py diff --git a/tests/unit/test_sandbox_server_auth.py b/tests/unit/test_sandbox_server_auth.py new file mode 100644 index 00000000..bea482bc --- /dev/null +++ b/tests/unit/test_sandbox_server_auth.py @@ -0,0 +1,86 @@ +"""Tests for the embedded sandbox FastAPI server's bearer-token auth (issue #78).""" + +import importlib.util +import subprocess + +from fastapi.testclient import TestClient + +from agent.tools.sandbox_client import _SANDBOX_SERVER + + +def _load_server(tmp_path, monkeypatch, token): + """Write the embedded server source to disk and importlib-load it. + + Module-level `_AUTH_TOKEN` is bound at import time from `os.environ`, so + `monkeypatch.setenv` before import is what makes each test isolated. + """ + monkeypatch.setenv("HF_TOKEN", token) + path = tmp_path / "sandbox_server.py" + path.write_text(_SANDBOX_SERVER) + spec = importlib.util.spec_from_file_location("sandbox_server_under_test", str(path)) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_missing_authorization_header_rejects(tmp_path, monkeypatch): + mod = _load_server(tmp_path, monkeypatch, "secret-xyz") + client = TestClient(mod.app) + assert client.get("/api/health").status_code == 401 + + +def test_bearer_wrong_token_rejects(tmp_path, monkeypatch): + mod = _load_server(tmp_path, monkeypatch, "secret-xyz") + client = TestClient(mod.app) + r = client.get("/api/health", headers={"Authorization": "Bearer wrong"}) + assert r.status_code == 401 + + +def test_bearer_correct_token_passes(tmp_path, monkeypatch): + mod = _load_server(tmp_path, monkeypatch, "secret-xyz") + client = TestClient(mod.app) + r = client.get("/api/health", headers={"Authorization": "Bearer secret-xyz"}) + assert r.status_code == 200 + assert r.json() == {"status": "ok"} + + +def test_bash_unauthenticated_never_executes(tmp_path, monkeypatch): + """/api/bash must 401 before subprocess.Popen is invoked.""" + mod = _load_server(tmp_path, monkeypatch, "secret-xyz") + + def _fail(*_a, **_kw): + raise AssertionError("subprocess.Popen invoked without auth") + + monkeypatch.setattr(subprocess, "Popen", _fail) + client = TestClient(mod.app) + r = client.post( + "/api/bash", + headers={"Authorization": "Bearer wrong"}, + json={"command": "id", "work_dir": "/app", "timeout": 10}, + ) + assert r.status_code == 401 + + +def test_fail_closed_when_hf_token_unset(tmp_path, monkeypatch): + """With no HF_TOKEN in the env, every request must 401 — including ones + that present an empty Bearer value.""" + mod = _load_server(tmp_path, monkeypatch, "") + client = TestClient(mod.app) + assert client.get("/api/health").status_code == 401 + r = client.get("/api/health", headers={"Authorization": "Bearer "}) + assert r.status_code == 401 + + +def test_write_endpoint_also_protected(tmp_path, monkeypatch): + """Spot-check that POST routes beyond /api/bash are covered by the + app-wide dependency (write/edit/read/kill/exists all share it).""" + mod = _load_server(tmp_path, monkeypatch, "secret-xyz") + client = TestClient(mod.app) + target = tmp_path / "should_not_exist.txt" + r = client.post( + "/api/write", + headers={"Authorization": "Bearer wrong"}, + json={"path": str(target), "content": "pwned"}, + ) + assert r.status_code == 401 + assert not target.exists() From 5603f8667699d89313ffab23b5b8f6ad1e625f6e Mon Sep 17 00:00:00 2001 From: Pratyush Sharma <56130065+pratyush618@users.noreply.github.com> Date: Thu, 23 Apr 2026 23:55:03 +0530 Subject: [PATCH 3/6] fix(sandbox): default new sandboxes to private Defense in depth alongside the server-side bearer-token check: make Sandbox.create(..., private=True) the default so the HF Space platform itself also gates access to the *.hf.space URL. The client already sends the user's HF_TOKEN in the Bearer header, which is what HF's gateway validates for private-space access, so existing flows stay working. sandbox_create_handler only forwards 'private' when the caller explicitly passes it, so this default propagates cleanly without any handler change. --- agent/tools/sandbox_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/tools/sandbox_client.py b/agent/tools/sandbox_client.py index 32c56a7c..bf47e298 100644 --- a/agent/tools/sandbox_client.py +++ b/agent/tools/sandbox_client.py @@ -528,7 +528,7 @@ def create( name: str | None = None, template: str = TEMPLATE_SPACE, hardware: str = "cpu-basic", - private: bool = False, + private: bool = True, sleep_time: int | None = None, token: str | None = None, secrets: dict[str, str] | None = None, From 909c450b6312cdcff7facb4579a2ae74179bfd68 Mon Sep 17 00:00:00 2001 From: Pratyush Sharma <56130065+pratyush618@users.noreply.github.com> Date: Fri, 24 Apr 2026 19:46:29 +0530 Subject: [PATCH 4/6] fix(sandbox): fail fast in _wait_for_api when auth is rejected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this change, a reachable sandbox server that returned 401/403 on /api/health (e.g. because HF_TOKEN was not set as a Space secret, or the client and server tokens don't match) would cause the health-poll loop to spin for the full API_WAIT_TIMEOUT and then raise "not responding" — a misleading error when the real cause is auth. Short-circuit on 401/403 with a RuntimeError that points at the likely misconfiguration. TimeoutError still fires for genuine unreachability. --- agent/tools/sandbox_client.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/agent/tools/sandbox_client.py b/agent/tools/sandbox_client.py index bf47e298..7b5281b9 100644 --- a/agent/tools/sandbox_client.py +++ b/agent/tools/sandbox_client.py @@ -682,6 +682,17 @@ def _wait_for_api(self, timeout: int = API_WAIT_TIMEOUT, log: Callable[[str], ob if resp.status_code == 200: log(f"API is responsive at {self._base_url}") return + # A reachable server that rejects auth will keep returning + # 401/403 for the full timeout — fail fast with a clear message + # instead of looping until TimeoutError hides the real cause. + if resp.status_code in (401, 403): + raise RuntimeError( + f"Sandbox API at {self._base_url} rejected auth " + f"(HTTP {resp.status_code}). Check that HF_TOKEN is set " + f"as a Space secret and matches the client token." + ) + except RuntimeError: + raise except Exception as e: last_err = e time.sleep(3) From 24c5650b4c6dc1116725085f49e45d2267d019df Mon Sep 17 00:00:00 2001 From: Pratyush Sharma <56130065+pratyush618@users.noreply.github.com> Date: Fri, 24 Apr 2026 19:46:51 +0530 Subject: [PATCH 5/6] docs(sandbox): reflect private=true default in sandbox_create tool spec The 'private' parameter description ("If true, create a private Space") implied false was the default. Since Sandbox.create now defaults to private=True, the schema an LLM sees should match. Update the description to state the true default and how to opt out. --- agent/tools/sandbox_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/tools/sandbox_tool.py b/agent/tools/sandbox_tool.py index 74c6a788..cc67504b 100644 --- a/agent/tools/sandbox_tool.py +++ b/agent/tools/sandbox_tool.py @@ -193,7 +193,7 @@ async def _watch_cancel(): }, "private": { "type": "boolean", - "description": "If true, create a private Space", + "description": "Whether the Space is private (default: true). Set false to create a public Space.", }, }, }, From 2f3bcaadd024c77ca6a1657474be666079feef82 Mon Sep 17 00:00:00 2001 From: Pratyush Sharma <56130065+pratyush618@users.noreply.github.com> Date: Fri, 24 Apr 2026 19:47:41 +0530 Subject: [PATCH 6/6] test(sandbox): add positive-path bash-with-valid-auth case The auth tests so far only cover the rejection paths. Add a matching positive case that confirms /api/bash actually runs a command and returns its stdout when the Bearer token is correct, so a regression that broke the happy path (e.g. require_auth accidentally rejecting valid tokens) would show up as a test failure instead of only manifesting at runtime. --- tests/unit/test_sandbox_server_auth.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/unit/test_sandbox_server_auth.py b/tests/unit/test_sandbox_server_auth.py index bea482bc..e8a4d3b5 100644 --- a/tests/unit/test_sandbox_server_auth.py +++ b/tests/unit/test_sandbox_server_auth.py @@ -84,3 +84,19 @@ def test_write_endpoint_also_protected(tmp_path, monkeypatch): ) assert r.status_code == 401 assert not target.exists() + + +def test_bash_with_valid_auth_executes(tmp_path, monkeypatch): + """Positive-path check: with the correct Bearer, /api/bash actually runs + the command and returns its output. Balances the auth-only negative tests.""" + mod = _load_server(tmp_path, monkeypatch, "secret-xyz") + client = TestClient(mod.app) + r = client.post( + "/api/bash", + headers={"Authorization": "Bearer secret-xyz"}, + json={"command": "echo hello-sandbox", "work_dir": str(tmp_path), "timeout": 10}, + ) + assert r.status_code == 200 + payload = r.json() + assert payload["success"] is True + assert "hello-sandbox" in payload["output"]