3333import hashlib
3434import logging
3535import re
36+ import shlex
3637import tomllib
3738from dataclasses import dataclass
3839from pathlib import Path # noqa: TC003 - used at runtime
@@ -76,6 +77,56 @@ def _normalize_name(name: str) -> str:
7677 return normalized .strip ("-" ) or "converted"
7778
7879
80+ def _docker_instruction_name (line : str ) -> str | None :
81+ """Return the Dockerfile instruction name for *line*, if it has one."""
82+ stripped = line .strip ()
83+ if not stripped or stripped .startswith ("#" ):
84+ return None
85+ return stripped .split (maxsplit = 1 )[0 ].upper ()
86+
87+
88+ def _docker_instruction_value (line : str ) -> str :
89+ """Return the remainder of a Dockerfile instruction line."""
90+ parts = line .strip ().split (maxsplit = 1 )
91+ return parts [1 ] if len (parts ) > 1 else ""
92+
93+
94+ def _extract_workdir (content : str ) -> str :
95+ """Return the last Dockerfile WORKDIR, defaulting to /app."""
96+ workdir = "/app"
97+ for line in content .splitlines ():
98+ if _docker_instruction_name (line ) != "WORKDIR" :
99+ continue
100+ value = _docker_instruction_value (line )
101+ if value :
102+ workdir = value
103+ return workdir
104+
105+
106+ def _make_task_slug (task_id : str , used_slugs : set [str ]) -> str :
107+ """Create a stable, unique HUD task slug from a Harbor task id."""
108+ base = _normalize_name (task_id )
109+ digest = hashlib .sha256 (task_id .encode ()).hexdigest ()[:8 ]
110+
111+ def with_suffix (suffix : str ) -> str :
112+ prefix_limit = 99 - len (suffix )
113+ return f"{ base [:prefix_limit ].rstrip ('-' )} -{ suffix } "
114+
115+ slug = with_suffix (digest ) if len (base ) > 100 else base
116+
117+ if slug in used_slugs :
118+ slug = with_suffix (f"{ digest } -1" if len (base ) > 100 else digest )
119+
120+ counter = 2
121+ while slug in used_slugs :
122+ suffix = f"{ digest } -{ counter } "
123+ slug = with_suffix (suffix )
124+ counter += 1
125+
126+ used_slugs .add (slug )
127+ return slug
128+
129+
79130def _find_dockerfile (env_dir : Path ) -> str | None :
80131 """Read the Dockerfile from a Harbor environment directory."""
81132 for name in ("Dockerfile" , "dockerfile" ):
@@ -92,8 +143,20 @@ def _adapt_harbor_dockerfile(content: str) -> str:
92143 """
93144 lines = content .splitlines ()
94145 adapted : list [str ] = []
146+ in_healthcheck_continuation = False
95147 for line in lines :
96148 stripped = line .strip ().upper ()
149+
150+ if stripped .startswith ("HEALTHCHECK " ):
151+ adapted .append (line )
152+ in_healthcheck_continuation = line .rstrip ().endswith ("\\ " )
153+ continue
154+
155+ if in_healthcheck_continuation :
156+ adapted .append (line )
157+ in_healthcheck_continuation = line .rstrip ().endswith ("\\ " )
158+ continue
159+
97160 if stripped .startswith (("CMD " , "CMD[" , "ENTRYPOINT " , "ENTRYPOINT[" )):
98161 adapted .append (f"# [harbor original] { line } " )
99162 else :
@@ -167,26 +230,64 @@ def _parse_task(task_dir: Path) -> HarborTask | None:
167230
168231import json
169232import logging
233+ import os
170234import subprocess
171235from pathlib import Path
172236{extra_imports}
173237from hud import Environment
174238from hud.tools import BashTool, EditTool
175239from hud.tools.filesystem import GlobTool, GrepTool, ListTool, ReadTool
240+ from hud.tools.types import ToolError
176241
177242LOGGER = logging.getLogger(__name__)
178243
179- TASKS_DIR = Path("/harbor/tasks")
244+ TASKS_DIR = Path("/root/.hud_harbor/tasks")
245+ AGENT_WORKDIR = os.path.expandvars({agent_workdir!r})
246+
247+
248+ def _set_agent_workdir() -> None:
249+ """Put agent shell sessions in the original Harbor challenge workdir."""
250+ try:
251+ os.chdir(AGENT_WORKDIR)
252+ except FileNotFoundError:
253+ if TASKS_DIR.exists():
254+ LOGGER.warning("Agent workdir does not exist: %s", AGENT_WORKDIR)
255+ else:
256+ LOGGER.debug("Skipping container workdir on host import: %s", AGENT_WORKDIR)
257+
258+
259+ _set_agent_workdir()
260+
261+
262+ def _resolve_within_base(file_path: Path, base_path: Path) -> Path:
263+ resolved = file_path.resolve() if file_path.is_absolute() else (base_path / file_path).resolve()
264+ try:
265+ resolved.relative_to(base_path)
266+ except ValueError:
267+ raise ToolError(f"Path escapes base directory: {{file_path}}") from None
268+ return resolved
269+
270+
271+ class ScopedEditTool(EditTool):
272+ """EditTool variant constrained to the task workdir."""
273+
274+ def __init__(self, base_path: str | Path) -> None:
275+ super().__init__()
276+ self._base_path = Path(base_path).resolve()
277+
278+ def validate_path(self, command: str, path: Path) -> None:
279+ resolved = _resolve_within_base(path, self._base_path)
280+ super().validate_path(command, resolved)
180281
181282env = Environment("{env_name}")
182283
183284# Standard coding tools - agents interact via bash (matching Harbor's model)
184- env.add_tool(BashTool())
185- env.add_tool(EditTool( ))
186- env.add_tool(ReadTool())
187- env.add_tool(GrepTool())
188- env.add_tool(GlobTool())
189- env.add_tool(ListTool())
285+ env.add_tool(BashTool(timeout=600.0 ))
286+ env.add_tool(ScopedEditTool(base_path=AGENT_WORKDIR ))
287+ env.add_tool(ReadTool(base_path=AGENT_WORKDIR ))
288+ env.add_tool(GrepTool(base_path=AGENT_WORKDIR ))
289+ env.add_tool(GlobTool(base_path=AGENT_WORKDIR ))
290+ env.add_tool(ListTool(base_path=AGENT_WORKDIR ))
190291
191292'''
192293
@@ -208,7 +309,7 @@ async def run_task(task_id: TaskId):
208309_SCENARIO_BODY = '''\
209310 """Run a Harbor task by ID.
210311
211- Reads /harbor/tasks/<task_id>/ instruction.md as the prompt.
312+ Reads the root-only task bundle's instruction.md as the prompt.
212313 After the agent works, runs tests/test.sh and parses
213314 /logs/verifier/reward.txt or reward.json for the reward.
214315 """
@@ -228,6 +329,11 @@ async def run_task(task_id: TaskId):
228329 # Ensure log output directory exists
229330 logs_dir = Path("/logs/verifier")
230331 logs_dir.mkdir(parents=True, exist_ok=True)
332+ for reward_file in (Path("/logs/verifier/reward.txt"), Path("/logs/verifier/reward.json")):
333+ try:
334+ reward_file.unlink(missing_ok=True)
335+ except OSError as exc:
336+ LOGGER.warning("Failed to clear stale reward file %s: %s", reward_file, exc)
231337
232338 # Harbor mounts the task's tests/ directory at /tests/ — replicate that
233339 tests_link = Path("/tests")
@@ -243,7 +349,7 @@ async def run_task(task_id: TaskId):
243349 try:
244350 result = subprocess.run(
245351 ["bash", str(test_script)],
246- cwd="/app",
352+ cwd=AGENT_WORKDIR if Path(AGENT_WORKDIR).is_dir() else "/app",
247353 capture_output=True,
248354 text=True,
249355 timeout={verifier_timeout},
@@ -303,6 +409,7 @@ def _build_env_py(
303409 source_path : str ,
304410 task_ids : list [str ],
305411 verifier_timeout : int ,
412+ agent_workdir : str ,
306413) -> str :
307414 """Build the env.py content, adapting the scenario signature to task count."""
308415 if len (task_ids ) == 1 :
@@ -318,6 +425,7 @@ def _build_env_py(
318425 source_path = source_path ,
319426 task_count = len (task_ids ),
320427 extra_imports = extra_imports ,
428+ agent_workdir = agent_workdir ,
321429 )
322430 body = _SCENARIO_BODY .format (verifier_timeout = verifier_timeout )
323431 return header + scenario + body
@@ -327,6 +435,14 @@ def _build_env_py(
327435# Shared snippet: install uv standalone (works on any base image with curl or
328436# apt), then use uv to bootstrap Python and sync dependencies.
329437_HUD_LAYER = """\
438+ USER root
439+ # HUD coding subprocesses run as uid/gid 1000, so let them edit the original
440+ # challenge tree while keeping scenario-only task data outside that tree.
441+ RUN agent_workdir={agent_workdir_shell} \\
442+ && eval "agent_workdir=\\ "$agent_workdir\\ "" \\
443+ && mkdir -p /workspace /app \\
444+ && if [ -d "$agent_workdir" ]; then chmod -R a+rwX "$agent_workdir"; fi
445+
330446# ============================================================
331447# HUD MCP server layer
332448# ============================================================
@@ -341,19 +457,22 @@ def _build_env_py(
341457ENV PATH="/root/.local/bin:$PATH"
342458
343459COPY pyproject.toml uv.lock* ./
344- RUN uv sync --frozen --no-dev --no-install-project 2>/dev/null || \\
345- uv sync --no-dev --no-install-project
460+ RUN uv sync --frozen --no-dev --no-install-project --python 3.12 2>/dev/null || \\
461+ uv sync --no-dev --no-install-project --python 3.12
462+ ENV PATH="/hud/.venv/bin:$PATH"
346463
347- # Harbor task data (instructions + test scripts baked into image)
348- COPY tasks/ /harbor/tasks/
464+ # The scenario reads task data directly from a root-only bundle. The agent only
465+ # receives the yielded prompt and task workdir files.
466+ COPY tasks/ /root/.hud_harbor/tasks/
467+ RUN chown -R root:root /root/.hud_harbor && chmod -R go-rwx /root/.hud_harbor
349468
350469# Ensure standard directories exist and are writable at runtime
351470# (MCP server may run as non-root; Harbor tasks expect /app writable)
352471RUN mkdir -p /logs/verifier /workspace /app && chmod 777 /logs/verifier /workspace /app
353472
354473COPY env.py ./
355474
356- CMD ["uv", "run", "--no-project", "python", "-m", " hud", "dev", "env:env", "--stdio"]
475+ CMD ["hud", "dev", "env:env", "--stdio"]
357476"""
358477
359478DOCKERFILE_WITH_BASE_TEMPLATE = (
@@ -457,6 +576,7 @@ def convert(self, path: Path) -> ConvertResult:
457576 # Generate environments and taskset
458577 environments : list [GeneratedEnvironment ] = []
459578 taskset : list [dict [str , Any ]] = []
579+ used_slugs : set [str ] = set ()
460580 base_name = f"hud-harbor-{ _normalize_name (dataset_name )} "
461581
462582 # Sort groups by size (largest first) for consistent naming
@@ -470,6 +590,13 @@ def convert(self, path: Path) -> ConvertResult:
470590 rep_task = group_tasks [0 ]
471591 env_dir = rep_task .directory / "environment"
472592 dockerfile_content = _find_dockerfile (env_dir ) if env_dir .exists () else None
593+ agent_workdir = _extract_workdir (dockerfile_content or "" )
594+ env_cfg = rep_task .config .get ("environment" , {})
595+ if isinstance (env_cfg , dict ):
596+ configured_workdir = env_cfg .get ("workdir" )
597+ if isinstance (configured_workdir , str ) and configured_workdir :
598+ agent_workdir = configured_workdir
599+ agent_workdir_shell = shlex .quote (agent_workdir )
473600
474601 # Extract verifier timeout from config
475602 verifier_timeout = 600
@@ -487,6 +614,7 @@ def convert(self, path: Path) -> ConvertResult:
487614 source_path = path .as_posix (),
488615 task_ids = task_ids ,
489616 verifier_timeout = verifier_timeout ,
617+ agent_workdir = agent_workdir ,
490618 )
491619
492620 # --- Generate Dockerfile.hud ---
@@ -495,9 +623,12 @@ def convert(self, path: Path) -> ConvertResult:
495623 dockerfile = DOCKERFILE_WITH_BASE_TEMPLATE .format (
496624 source = env_dir .as_posix (),
497625 base_dockerfile = adapted ,
626+ agent_workdir_shell = agent_workdir_shell ,
498627 )
499628 else :
500- dockerfile = DOCKERFILE_FALLBACK_TEMPLATE
629+ dockerfile = DOCKERFILE_FALLBACK_TEMPLATE .format (
630+ agent_workdir_shell = agent_workdir_shell ,
631+ )
501632
502633 # --- Generate pyproject.toml ---
503634 pyproject = PYPROJECT_TEMPLATE .format (name = env_name )
@@ -532,10 +663,13 @@ def convert(self, path: Path) -> ConvertResult:
532663
533664 taskset .append (
534665 {
666+ "slug" : _make_task_slug (task .task_id , used_slugs ),
535667 "env" : {"name" : env_name },
536668 "scenario" : f"{ env_name } :run-task" ,
537669 "args" : {"task_id" : task .task_id },
538670 "metadata" : metadata ,
671+ "agent_config" : {"append_setup_output" : False },
672+ "validation" : None ,
539673 }
540674 )
541675
0 commit comments