Skip to content

Commit 11d8032

Browse files
nancyjlauryantzr1
andcommitted
Update Harbor converter runtime isolation
Co-authored-by: Ryan Tan <63581031+ryantzr1@users.noreply.github.com>
1 parent a8fb83b commit 11d8032

3 files changed

Lines changed: 276 additions & 20 deletions

File tree

hud/cli/convert/__init__.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import json
1515
import logging
16+
import re
1617
import shutil
1718
from pathlib import Path
1819

@@ -36,6 +37,17 @@
3637

3738
# Shell script extensions that need CRLF -> LF normalization
3839
_SHELL_EXTENSIONS = frozenset({".sh", ".bash", ".zsh", ".ksh"})
40+
_SOURCE_CONTEXT_MARKDOWN_RE = re.compile(r"^[a-z]{2,8}-\d+(?:-\d+)*\.md$", re.IGNORECASE)
41+
42+
43+
def _should_skip_task_data_item(path: Path) -> bool:
44+
"""Return True for source task files that should not be exposed to agents."""
45+
lower_name = path.name.lower()
46+
return (
47+
path.name in ("environment", "solution")
48+
or lower_name in (".dockerignore", "scoring.md")
49+
or bool(_SOURCE_CONTEXT_MARKDOWN_RE.match(path.name))
50+
)
3951

4052

4153
def _normalize_line_endings(directory: Path) -> None:
@@ -154,8 +166,8 @@ def write_result(result: ConvertResult, output_dir: Path) -> Path:
154166
dest.mkdir(parents=True, exist_ok=True)
155167

156168
for item in source_dir.iterdir():
157-
# Skip dirs that are handled by the Dockerfile or ignored
158-
if item.name in ("environment", "solution"):
169+
# Skip dirs handled elsewhere and files that leak scoring/source context.
170+
if _should_skip_task_data_item(item):
159171
continue
160172
if item.is_dir():
161173
shutil.copytree(item, dest / item.name)

hud/cli/convert/harbor.py

Lines changed: 149 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import hashlib
3434
import logging
3535
import re
36+
import shlex
3637
import tomllib
3738
from dataclasses import dataclass
3839
from pathlib import Path # noqa: TC003 - used at runtime
@@ -76,6 +77,56 @@ def _normalize_name(name: str) -> str:
7677
return normalized.strip("-") or "converted"
7778

7879

80+
def _docker_instruction_name(line: str) -> str | None:
81+
"""Return the Dockerfile instruction name for *line*, if it has one."""
82+
stripped = line.strip()
83+
if not stripped or stripped.startswith("#"):
84+
return None
85+
return stripped.split(maxsplit=1)[0].upper()
86+
87+
88+
def _docker_instruction_value(line: str) -> str:
89+
"""Return the remainder of a Dockerfile instruction line."""
90+
parts = line.strip().split(maxsplit=1)
91+
return parts[1] if len(parts) > 1 else ""
92+
93+
94+
def _extract_workdir(content: str) -> str:
95+
"""Return the last Dockerfile WORKDIR, defaulting to /app."""
96+
workdir = "/app"
97+
for line in content.splitlines():
98+
if _docker_instruction_name(line) != "WORKDIR":
99+
continue
100+
value = _docker_instruction_value(line)
101+
if value:
102+
workdir = value
103+
return workdir
104+
105+
106+
def _make_task_slug(task_id: str, used_slugs: set[str]) -> str:
107+
"""Create a stable, unique HUD task slug from a Harbor task id."""
108+
base = _normalize_name(task_id)
109+
digest = hashlib.sha256(task_id.encode()).hexdigest()[:8]
110+
111+
def with_suffix(suffix: str) -> str:
112+
prefix_limit = 99 - len(suffix)
113+
return f"{base[:prefix_limit].rstrip('-')}-{suffix}"
114+
115+
slug = with_suffix(digest) if len(base) > 100 else base
116+
117+
if slug in used_slugs:
118+
slug = with_suffix(f"{digest}-1" if len(base) > 100 else digest)
119+
120+
counter = 2
121+
while slug in used_slugs:
122+
suffix = f"{digest}-{counter}"
123+
slug = with_suffix(suffix)
124+
counter += 1
125+
126+
used_slugs.add(slug)
127+
return slug
128+
129+
79130
def _find_dockerfile(env_dir: Path) -> str | None:
80131
"""Read the Dockerfile from a Harbor environment directory."""
81132
for name in ("Dockerfile", "dockerfile"):
@@ -92,8 +143,20 @@ def _adapt_harbor_dockerfile(content: str) -> str:
92143
"""
93144
lines = content.splitlines()
94145
adapted: list[str] = []
146+
in_healthcheck_continuation = False
95147
for line in lines:
96148
stripped = line.strip().upper()
149+
150+
if stripped.startswith("HEALTHCHECK "):
151+
adapted.append(line)
152+
in_healthcheck_continuation = line.rstrip().endswith("\\")
153+
continue
154+
155+
if in_healthcheck_continuation:
156+
adapted.append(line)
157+
in_healthcheck_continuation = line.rstrip().endswith("\\")
158+
continue
159+
97160
if stripped.startswith(("CMD ", "CMD[", "ENTRYPOINT ", "ENTRYPOINT[")):
98161
adapted.append(f"# [harbor original] {line}")
99162
else:
@@ -167,26 +230,64 @@ def _parse_task(task_dir: Path) -> HarborTask | None:
167230
168231
import json
169232
import logging
233+
import os
170234
import subprocess
171235
from pathlib import Path
172236
{extra_imports}
173237
from hud import Environment
174238
from hud.tools import BashTool, EditTool
175239
from hud.tools.filesystem import GlobTool, GrepTool, ListTool, ReadTool
240+
from hud.tools.types import ToolError
176241
177242
LOGGER = logging.getLogger(__name__)
178243
179-
TASKS_DIR = Path("/harbor/tasks")
244+
TASKS_DIR = Path("/root/.hud_harbor/tasks")
245+
AGENT_WORKDIR = os.path.expandvars({agent_workdir!r})
246+
247+
248+
def _set_agent_workdir() -> None:
249+
"""Put agent shell sessions in the original Harbor challenge workdir."""
250+
try:
251+
os.chdir(AGENT_WORKDIR)
252+
except FileNotFoundError:
253+
if TASKS_DIR.exists():
254+
LOGGER.warning("Agent workdir does not exist: %s", AGENT_WORKDIR)
255+
else:
256+
LOGGER.debug("Skipping container workdir on host import: %s", AGENT_WORKDIR)
257+
258+
259+
_set_agent_workdir()
260+
261+
262+
def _resolve_within_base(file_path: Path, base_path: Path) -> Path:
263+
resolved = file_path.resolve() if file_path.is_absolute() else (base_path / file_path).resolve()
264+
try:
265+
resolved.relative_to(base_path)
266+
except ValueError:
267+
raise ToolError(f"Path escapes base directory: {{file_path}}") from None
268+
return resolved
269+
270+
271+
class ScopedEditTool(EditTool):
272+
"""EditTool variant constrained to the task workdir."""
273+
274+
def __init__(self, base_path: str | Path) -> None:
275+
super().__init__()
276+
self._base_path = Path(base_path).resolve()
277+
278+
def validate_path(self, command: str, path: Path) -> None:
279+
resolved = _resolve_within_base(path, self._base_path)
280+
super().validate_path(command, resolved)
180281
181282
env = Environment("{env_name}")
182283
183284
# Standard coding tools - agents interact via bash (matching Harbor's model)
184-
env.add_tool(BashTool())
185-
env.add_tool(EditTool())
186-
env.add_tool(ReadTool())
187-
env.add_tool(GrepTool())
188-
env.add_tool(GlobTool())
189-
env.add_tool(ListTool())
285+
env.add_tool(BashTool(timeout=600.0))
286+
env.add_tool(ScopedEditTool(base_path=AGENT_WORKDIR))
287+
env.add_tool(ReadTool(base_path=AGENT_WORKDIR))
288+
env.add_tool(GrepTool(base_path=AGENT_WORKDIR))
289+
env.add_tool(GlobTool(base_path=AGENT_WORKDIR))
290+
env.add_tool(ListTool(base_path=AGENT_WORKDIR))
190291
191292
'''
192293

@@ -208,7 +309,7 @@ async def run_task(task_id: TaskId):
208309
_SCENARIO_BODY = '''\
209310
"""Run a Harbor task by ID.
210311
211-
Reads /harbor/tasks/<task_id>/instruction.md as the prompt.
312+
Reads the root-only task bundle's instruction.md as the prompt.
212313
After the agent works, runs tests/test.sh and parses
213314
/logs/verifier/reward.txt or reward.json for the reward.
214315
"""
@@ -228,6 +329,11 @@ async def run_task(task_id: TaskId):
228329
# Ensure log output directory exists
229330
logs_dir = Path("/logs/verifier")
230331
logs_dir.mkdir(parents=True, exist_ok=True)
332+
for reward_file in (Path("/logs/verifier/reward.txt"), Path("/logs/verifier/reward.json")):
333+
try:
334+
reward_file.unlink(missing_ok=True)
335+
except OSError as exc:
336+
LOGGER.warning("Failed to clear stale reward file %s: %s", reward_file, exc)
231337
232338
# Harbor mounts the task's tests/ directory at /tests/ — replicate that
233339
tests_link = Path("/tests")
@@ -243,7 +349,7 @@ async def run_task(task_id: TaskId):
243349
try:
244350
result = subprocess.run(
245351
["bash", str(test_script)],
246-
cwd="/app",
352+
cwd=AGENT_WORKDIR if Path(AGENT_WORKDIR).is_dir() else "/app",
247353
capture_output=True,
248354
text=True,
249355
timeout={verifier_timeout},
@@ -303,6 +409,7 @@ def _build_env_py(
303409
source_path: str,
304410
task_ids: list[str],
305411
verifier_timeout: int,
412+
agent_workdir: str,
306413
) -> str:
307414
"""Build the env.py content, adapting the scenario signature to task count."""
308415
if len(task_ids) == 1:
@@ -318,6 +425,7 @@ def _build_env_py(
318425
source_path=source_path,
319426
task_count=len(task_ids),
320427
extra_imports=extra_imports,
428+
agent_workdir=agent_workdir,
321429
)
322430
body = _SCENARIO_BODY.format(verifier_timeout=verifier_timeout)
323431
return header + scenario + body
@@ -327,6 +435,14 @@ def _build_env_py(
327435
# Shared snippet: install uv standalone (works on any base image with curl or
328436
# apt), then use uv to bootstrap Python and sync dependencies.
329437
_HUD_LAYER = """\
438+
USER root
439+
# HUD coding subprocesses run as uid/gid 1000, so let them edit the original
440+
# challenge tree while keeping scenario-only task data outside that tree.
441+
RUN agent_workdir={agent_workdir_shell} \\
442+
&& eval "agent_workdir=\\"$agent_workdir\\"" \\
443+
&& mkdir -p /workspace /app \\
444+
&& if [ -d "$agent_workdir" ]; then chmod -R a+rwX "$agent_workdir"; fi
445+
330446
# ============================================================
331447
# HUD MCP server layer
332448
# ============================================================
@@ -341,19 +457,22 @@ def _build_env_py(
341457
ENV PATH="/root/.local/bin:$PATH"
342458
343459
COPY pyproject.toml uv.lock* ./
344-
RUN uv sync --frozen --no-dev --no-install-project 2>/dev/null || \\
345-
uv sync --no-dev --no-install-project
460+
RUN uv sync --frozen --no-dev --no-install-project --python 3.12 2>/dev/null || \\
461+
uv sync --no-dev --no-install-project --python 3.12
462+
ENV PATH="/hud/.venv/bin:$PATH"
346463
347-
# Harbor task data (instructions + test scripts baked into image)
348-
COPY tasks/ /harbor/tasks/
464+
# The scenario reads task data directly from a root-only bundle. The agent only
465+
# receives the yielded prompt and task workdir files.
466+
COPY tasks/ /root/.hud_harbor/tasks/
467+
RUN chown -R root:root /root/.hud_harbor && chmod -R go-rwx /root/.hud_harbor
349468
350469
# Ensure standard directories exist and are writable at runtime
351470
# (MCP server may run as non-root; Harbor tasks expect /app writable)
352471
RUN mkdir -p /logs/verifier /workspace /app && chmod 777 /logs/verifier /workspace /app
353472
354473
COPY env.py ./
355474
356-
CMD ["uv", "run", "--no-project", "python", "-m", "hud", "dev", "env:env", "--stdio"]
475+
CMD ["hud", "dev", "env:env", "--stdio"]
357476
"""
358477

359478
DOCKERFILE_WITH_BASE_TEMPLATE = (
@@ -457,6 +576,7 @@ def convert(self, path: Path) -> ConvertResult:
457576
# Generate environments and taskset
458577
environments: list[GeneratedEnvironment] = []
459578
taskset: list[dict[str, Any]] = []
579+
used_slugs: set[str] = set()
460580
base_name = f"hud-harbor-{_normalize_name(dataset_name)}"
461581

462582
# Sort groups by size (largest first) for consistent naming
@@ -470,6 +590,13 @@ def convert(self, path: Path) -> ConvertResult:
470590
rep_task = group_tasks[0]
471591
env_dir = rep_task.directory / "environment"
472592
dockerfile_content = _find_dockerfile(env_dir) if env_dir.exists() else None
593+
agent_workdir = _extract_workdir(dockerfile_content or "")
594+
env_cfg = rep_task.config.get("environment", {})
595+
if isinstance(env_cfg, dict):
596+
configured_workdir = env_cfg.get("workdir")
597+
if isinstance(configured_workdir, str) and configured_workdir:
598+
agent_workdir = configured_workdir
599+
agent_workdir_shell = shlex.quote(agent_workdir)
473600

474601
# Extract verifier timeout from config
475602
verifier_timeout = 600
@@ -487,6 +614,7 @@ def convert(self, path: Path) -> ConvertResult:
487614
source_path=path.as_posix(),
488615
task_ids=task_ids,
489616
verifier_timeout=verifier_timeout,
617+
agent_workdir=agent_workdir,
490618
)
491619

492620
# --- Generate Dockerfile.hud ---
@@ -495,9 +623,12 @@ def convert(self, path: Path) -> ConvertResult:
495623
dockerfile = DOCKERFILE_WITH_BASE_TEMPLATE.format(
496624
source=env_dir.as_posix(),
497625
base_dockerfile=adapted,
626+
agent_workdir_shell=agent_workdir_shell,
498627
)
499628
else:
500-
dockerfile = DOCKERFILE_FALLBACK_TEMPLATE
629+
dockerfile = DOCKERFILE_FALLBACK_TEMPLATE.format(
630+
agent_workdir_shell=agent_workdir_shell,
631+
)
501632

502633
# --- Generate pyproject.toml ---
503634
pyproject = PYPROJECT_TEMPLATE.format(name=env_name)
@@ -532,10 +663,13 @@ def convert(self, path: Path) -> ConvertResult:
532663

533664
taskset.append(
534665
{
666+
"slug": _make_task_slug(task.task_id, used_slugs),
535667
"env": {"name": env_name},
536668
"scenario": f"{env_name}:run-task",
537669
"args": {"task_id": task.task_id},
538670
"metadata": metadata,
671+
"agent_config": {"append_setup_output": False},
672+
"validation": None,
539673
}
540674
)
541675

0 commit comments

Comments
 (0)