Skip to content

Commit 6568d5f

Browse files
nancyjlauryantzr1
andcommitted
Update Harbor converter runtime isolation
Co-authored-by: Ryan Tan <63581031+ryantzr1@users.noreply.github.com>
1 parent a8fb83b commit 6568d5f

3 files changed

Lines changed: 257 additions & 20 deletions

File tree

hud/cli/convert/__init__.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import json
1515
import logging
16+
import re
1617
import shutil
1718
from pathlib import Path
1819

@@ -36,6 +37,17 @@
3637

3738
# Shell script extensions that need CRLF -> LF normalization
3839
_SHELL_EXTENSIONS = frozenset({".sh", ".bash", ".zsh", ".ksh"})
40+
_SOURCE_CONTEXT_MARKDOWN_RE = re.compile(r"^[a-z]{2,8}-\d+(?:-\d+)*\.md$", re.IGNORECASE)
41+
42+
43+
def _should_skip_task_data_item(path: Path) -> bool:
44+
"""Return True for source task files that should not be exposed to agents."""
45+
lower_name = path.name.lower()
46+
return (
47+
path.name in ("environment", "solution")
48+
or lower_name in (".dockerignore", "scoring.md")
49+
or bool(_SOURCE_CONTEXT_MARKDOWN_RE.match(path.name))
50+
)
3951

4052

4153
def _normalize_line_endings(directory: Path) -> None:
@@ -154,8 +166,8 @@ def write_result(result: ConvertResult, output_dir: Path) -> Path:
154166
dest.mkdir(parents=True, exist_ok=True)
155167

156168
for item in source_dir.iterdir():
157-
# Skip dirs that are handled by the Dockerfile or ignored
158-
if item.name in ("environment", "solution"):
169+
# Skip dirs handled elsewhere and files that leak scoring/source context.
170+
if _should_skip_task_data_item(item):
159171
continue
160172
if item.is_dir():
161173
shutil.copytree(item, dest / item.name)

hud/cli/convert/harbor.py

Lines changed: 148 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import hashlib
3434
import logging
3535
import re
36+
import shlex
3637
import tomllib
3738
from dataclasses import dataclass
3839
from pathlib import Path # noqa: TC003 - used at runtime
@@ -76,6 +77,55 @@ def _normalize_name(name: str) -> str:
7677
return normalized.strip("-") or "converted"
7778

7879

80+
def _docker_instruction_name(line: str) -> str | None:
81+
"""Return the Dockerfile instruction name for *line*, if it has one."""
82+
stripped = line.strip()
83+
if not stripped or stripped.startswith("#"):
84+
return None
85+
return stripped.split(maxsplit=1)[0].upper()
86+
87+
88+
def _docker_instruction_value(line: str) -> str:
89+
"""Return the remainder of a Dockerfile instruction line."""
90+
parts = line.strip().split(maxsplit=1)
91+
return parts[1] if len(parts) > 1 else ""
92+
93+
94+
def _extract_workdir(content: str) -> str:
95+
"""Return the last Dockerfile WORKDIR, defaulting to /app."""
96+
workdir = "/app"
97+
for line in content.splitlines():
98+
if _docker_instruction_name(line) != "WORKDIR":
99+
continue
100+
value = _docker_instruction_value(line)
101+
if value:
102+
workdir = value
103+
return workdir
104+
105+
106+
def _make_task_slug(task_id: str, used_slugs: set[str]) -> str:
107+
"""Create a stable, unique HUD task slug from a Harbor task id."""
108+
base = _normalize_name(task_id)
109+
digest = hashlib.sha256(task_id.encode()).hexdigest()[:8]
110+
111+
if len(base) > 100:
112+
base = f"{base[:91].rstrip('-')}-{digest}"
113+
114+
slug = base
115+
if slug in used_slugs:
116+
slug = f"{base[:91].rstrip('-')}-{digest}"
117+
118+
counter = 2
119+
while slug in used_slugs:
120+
suffix = f"{digest}-{counter}"
121+
prefix_limit = 99 - len(suffix)
122+
slug = f"{base[:prefix_limit].rstrip('-')}-{suffix}"
123+
counter += 1
124+
125+
used_slugs.add(slug)
126+
return slug
127+
128+
79129
def _find_dockerfile(env_dir: Path) -> str | None:
80130
"""Read the Dockerfile from a Harbor environment directory."""
81131
for name in ("Dockerfile", "dockerfile"):
@@ -92,8 +142,20 @@ def _adapt_harbor_dockerfile(content: str) -> str:
92142
"""
93143
lines = content.splitlines()
94144
adapted: list[str] = []
145+
in_healthcheck_continuation = False
95146
for line in lines:
96147
stripped = line.strip().upper()
148+
149+
if stripped.startswith("HEALTHCHECK "):
150+
adapted.append(line)
151+
in_healthcheck_continuation = line.rstrip().endswith("\\")
152+
continue
153+
154+
if in_healthcheck_continuation:
155+
adapted.append(line)
156+
in_healthcheck_continuation = line.rstrip().endswith("\\")
157+
continue
158+
97159
if stripped.startswith(("CMD ", "CMD[", "ENTRYPOINT ", "ENTRYPOINT[")):
98160
adapted.append(f"# [harbor original] {line}")
99161
else:
@@ -167,26 +229,64 @@ def _parse_task(task_dir: Path) -> HarborTask | None:
167229
168230
import json
169231
import logging
232+
import os
170233
import subprocess
171234
from pathlib import Path
172235
{extra_imports}
173236
from hud import Environment
174237
from hud.tools import BashTool, EditTool
175238
from hud.tools.filesystem import GlobTool, GrepTool, ListTool, ReadTool
239+
from hud.tools.types import ToolError
176240
177241
LOGGER = logging.getLogger(__name__)
178242
179-
TASKS_DIR = Path("/harbor/tasks")
243+
TASKS_DIR = Path("/root/.hud_harbor/tasks")
244+
AGENT_WORKDIR = os.path.expandvars({agent_workdir!r})
245+
246+
247+
def _set_agent_workdir() -> None:
248+
"""Put agent shell sessions in the original Harbor challenge workdir."""
249+
try:
250+
os.chdir(AGENT_WORKDIR)
251+
except FileNotFoundError:
252+
if TASKS_DIR.exists():
253+
LOGGER.warning("Agent workdir does not exist: %s", AGENT_WORKDIR)
254+
else:
255+
LOGGER.debug("Skipping container workdir on host import: %s", AGENT_WORKDIR)
256+
257+
258+
_set_agent_workdir()
259+
260+
261+
def _resolve_within_base(file_path: Path, base_path: Path) -> Path:
262+
resolved = file_path.resolve() if file_path.is_absolute() else (base_path / file_path).resolve()
263+
try:
264+
resolved.relative_to(base_path)
265+
except ValueError:
266+
raise ToolError(f"Path escapes base directory: {{file_path}}") from None
267+
return resolved
268+
269+
270+
class ScopedEditTool(EditTool):
271+
"""EditTool variant constrained to the task workdir."""
272+
273+
def __init__(self, base_path: str | Path) -> None:
274+
super().__init__()
275+
self._base_path = Path(base_path).resolve()
276+
277+
def validate_path(self, command: str, path: Path) -> None:
278+
resolved = _resolve_within_base(path, self._base_path)
279+
super().validate_path(command, resolved)
180280
181281
env = Environment("{env_name}")
182282
183283
# Standard coding tools - agents interact via bash (matching Harbor's model)
184-
env.add_tool(BashTool())
185-
env.add_tool(EditTool())
186-
env.add_tool(ReadTool())
187-
env.add_tool(GrepTool())
188-
env.add_tool(GlobTool())
189-
env.add_tool(ListTool())
284+
env.add_tool(BashTool(timeout=600.0))
285+
env.add_tool(ScopedEditTool(base_path=AGENT_WORKDIR))
286+
env.add_tool(ReadTool(base_path=AGENT_WORKDIR))
287+
env.add_tool(GrepTool(base_path=AGENT_WORKDIR))
288+
env.add_tool(GlobTool(base_path=AGENT_WORKDIR))
289+
env.add_tool(ListTool(base_path=AGENT_WORKDIR))
190290
191291
'''
192292

@@ -208,7 +308,7 @@ async def run_task(task_id: TaskId):
208308
_SCENARIO_BODY = '''\
209309
"""Run a Harbor task by ID.
210310
211-
Reads /harbor/tasks/<task_id>/instruction.md as the prompt.
311+
Reads the root-only task bundle's instruction.md as the prompt.
212312
After the agent works, runs tests/test.sh and parses
213313
/logs/verifier/reward.txt or reward.json for the reward.
214314
"""
@@ -228,6 +328,11 @@ async def run_task(task_id: TaskId):
228328
# Ensure log output directory exists
229329
logs_dir = Path("/logs/verifier")
230330
logs_dir.mkdir(parents=True, exist_ok=True)
331+
for reward_file in (Path("/logs/verifier/reward.txt"), Path("/logs/verifier/reward.json")):
332+
try:
333+
reward_file.unlink(missing_ok=True)
334+
except OSError as exc:
335+
LOGGER.warning("Failed to clear stale reward file %s: %s", reward_file, exc)
231336
232337
# Harbor mounts the task's tests/ directory at /tests/ — replicate that
233338
tests_link = Path("/tests")
@@ -243,7 +348,7 @@ async def run_task(task_id: TaskId):
243348
try:
244349
result = subprocess.run(
245350
["bash", str(test_script)],
246-
cwd="/app",
351+
cwd=AGENT_WORKDIR if Path(AGENT_WORKDIR).is_dir() else "/app",
247352
capture_output=True,
248353
text=True,
249354
timeout={verifier_timeout},
@@ -303,6 +408,7 @@ def _build_env_py(
303408
source_path: str,
304409
task_ids: list[str],
305410
verifier_timeout: int,
411+
agent_workdir: str,
306412
) -> str:
307413
"""Build the env.py content, adapting the scenario signature to task count."""
308414
if len(task_ids) == 1:
@@ -318,6 +424,7 @@ def _build_env_py(
318424
source_path=source_path,
319425
task_count=len(task_ids),
320426
extra_imports=extra_imports,
427+
agent_workdir=agent_workdir,
321428
)
322429
body = _SCENARIO_BODY.format(verifier_timeout=verifier_timeout)
323430
return header + scenario + body
@@ -327,6 +434,14 @@ def _build_env_py(
327434
# Shared snippet: install uv standalone (works on any base image with curl or
328435
# apt), then use uv to bootstrap Python and sync dependencies.
329436
_HUD_LAYER = """\
437+
USER root
438+
# HUD coding subprocesses run as uid/gid 1000, so let them edit the original
439+
# challenge tree while keeping scenario-only task data outside that tree.
440+
RUN agent_workdir={agent_workdir_shell} \\
441+
&& eval "agent_workdir=\\"$agent_workdir\\"" \\
442+
&& mkdir -p /workspace /app \\
443+
&& if [ -d "$agent_workdir" ]; then chmod -R a+rwX "$agent_workdir"; fi
444+
330445
# ============================================================
331446
# HUD MCP server layer
332447
# ============================================================
@@ -341,19 +456,22 @@ def _build_env_py(
341456
ENV PATH="/root/.local/bin:$PATH"
342457
343458
COPY pyproject.toml uv.lock* ./
344-
RUN uv sync --frozen --no-dev --no-install-project 2>/dev/null || \\
345-
uv sync --no-dev --no-install-project
459+
RUN uv sync --frozen --no-dev --no-install-project --python 3.12 2>/dev/null || \\
460+
uv sync --no-dev --no-install-project --python 3.12
461+
ENV PATH="/hud/.venv/bin:$PATH"
346462
347-
# Harbor task data (instructions + test scripts baked into image)
348-
COPY tasks/ /harbor/tasks/
463+
# The scenario reads task data directly from a root-only bundle. The agent only
464+
# receives the yielded prompt and task workdir files.
465+
COPY tasks/ /root/.hud_harbor/tasks/
466+
RUN chown -R root:root /root/.hud_harbor && chmod -R go-rwx /root/.hud_harbor
349467
350468
# Ensure standard directories exist and are writable at runtime
351469
# (MCP server may run as non-root; Harbor tasks expect /app writable)
352470
RUN mkdir -p /logs/verifier /workspace /app && chmod 777 /logs/verifier /workspace /app
353471
354472
COPY env.py ./
355473
356-
CMD ["uv", "run", "--no-project", "python", "-m", "hud", "dev", "env:env", "--stdio"]
474+
CMD ["hud", "dev", "env:env", "--stdio"]
357475
"""
358476

359477
DOCKERFILE_WITH_BASE_TEMPLATE = (
@@ -457,6 +575,7 @@ def convert(self, path: Path) -> ConvertResult:
457575
# Generate environments and taskset
458576
environments: list[GeneratedEnvironment] = []
459577
taskset: list[dict[str, Any]] = []
578+
used_slugs: set[str] = set()
460579
base_name = f"hud-harbor-{_normalize_name(dataset_name)}"
461580

462581
# Sort groups by size (largest first) for consistent naming
@@ -470,6 +589,13 @@ def convert(self, path: Path) -> ConvertResult:
470589
rep_task = group_tasks[0]
471590
env_dir = rep_task.directory / "environment"
472591
dockerfile_content = _find_dockerfile(env_dir) if env_dir.exists() else None
592+
agent_workdir = _extract_workdir(dockerfile_content or "")
593+
env_cfg = rep_task.config.get("environment", {})
594+
if isinstance(env_cfg, dict):
595+
configured_workdir = env_cfg.get("workdir")
596+
if isinstance(configured_workdir, str) and configured_workdir:
597+
agent_workdir = configured_workdir
598+
agent_workdir_shell = shlex.quote(agent_workdir)
473599

474600
# Extract verifier timeout from config
475601
verifier_timeout = 600
@@ -487,6 +613,7 @@ def convert(self, path: Path) -> ConvertResult:
487613
source_path=path.as_posix(),
488614
task_ids=task_ids,
489615
verifier_timeout=verifier_timeout,
616+
agent_workdir=agent_workdir,
490617
)
491618

492619
# --- Generate Dockerfile.hud ---
@@ -495,9 +622,12 @@ def convert(self, path: Path) -> ConvertResult:
495622
dockerfile = DOCKERFILE_WITH_BASE_TEMPLATE.format(
496623
source=env_dir.as_posix(),
497624
base_dockerfile=adapted,
625+
agent_workdir_shell=agent_workdir_shell,
498626
)
499627
else:
500-
dockerfile = DOCKERFILE_FALLBACK_TEMPLATE
628+
dockerfile = DOCKERFILE_FALLBACK_TEMPLATE.format(
629+
agent_workdir_shell=agent_workdir_shell,
630+
)
501631

502632
# --- Generate pyproject.toml ---
503633
pyproject = PYPROJECT_TEMPLATE.format(name=env_name)
@@ -532,10 +662,13 @@ def convert(self, path: Path) -> ConvertResult:
532662

533663
taskset.append(
534664
{
665+
"slug": _make_task_slug(task.task_id, used_slugs),
535666
"env": {"name": env_name},
536667
"scenario": f"{env_name}:run-task",
537668
"args": {"task_id": task.task_id},
538669
"metadata": metadata,
670+
"agent_config": {"append_setup_output": False},
671+
"validation": None,
539672
}
540673
)
541674

0 commit comments

Comments
 (0)