3333import hashlib
3434import logging
3535import re
36+ import shlex
3637import tomllib
3738from dataclasses import dataclass
3839from pathlib import Path # noqa: TC003 - used at runtime
@@ -76,6 +77,55 @@ def _normalize_name(name: str) -> str:
7677 return normalized .strip ("-" ) or "converted"
7778
7879
80+ def _docker_instruction_name (line : str ) -> str | None :
81+ """Return the Dockerfile instruction name for *line*, if it has one."""
82+ stripped = line .strip ()
83+ if not stripped or stripped .startswith ("#" ):
84+ return None
85+ return stripped .split (maxsplit = 1 )[0 ].upper ()
86+
87+
88+ def _docker_instruction_value (line : str ) -> str :
89+ """Return the remainder of a Dockerfile instruction line."""
90+ parts = line .strip ().split (maxsplit = 1 )
91+ return parts [1 ] if len (parts ) > 1 else ""
92+
93+
94+ def _extract_workdir (content : str ) -> str :
95+ """Return the last Dockerfile WORKDIR, defaulting to /app."""
96+ workdir = "/app"
97+ for line in content .splitlines ():
98+ if _docker_instruction_name (line ) != "WORKDIR" :
99+ continue
100+ value = _docker_instruction_value (line )
101+ if value :
102+ workdir = value
103+ return workdir
104+
105+
106+ def _make_task_slug (task_id : str , used_slugs : set [str ]) -> str :
107+ """Create a stable, unique HUD task slug from a Harbor task id."""
108+ base = _normalize_name (task_id )
109+ digest = hashlib .sha256 (task_id .encode ()).hexdigest ()[:8 ]
110+
111+ if len (base ) > 100 :
112+ base = f"{ base [:91 ].rstrip ('-' )} -{ digest } "
113+
114+ slug = base
115+ if slug in used_slugs :
116+ slug = f"{ base [:91 ].rstrip ('-' )} -{ digest } "
117+
118+ counter = 2
119+ while slug in used_slugs :
120+ suffix = f"{ digest } -{ counter } "
121+ prefix_limit = 99 - len (suffix )
122+ slug = f"{ base [:prefix_limit ].rstrip ('-' )} -{ suffix } "
123+ counter += 1
124+
125+ used_slugs .add (slug )
126+ return slug
127+
128+
79129def _find_dockerfile (env_dir : Path ) -> str | None :
80130 """Read the Dockerfile from a Harbor environment directory."""
81131 for name in ("Dockerfile" , "dockerfile" ):
@@ -92,8 +142,20 @@ def _adapt_harbor_dockerfile(content: str) -> str:
92142 """
93143 lines = content .splitlines ()
94144 adapted : list [str ] = []
145+ in_healthcheck_continuation = False
95146 for line in lines :
96147 stripped = line .strip ().upper ()
148+
149+ if stripped .startswith ("HEALTHCHECK " ):
150+ adapted .append (line )
151+ in_healthcheck_continuation = line .rstrip ().endswith ("\\ " )
152+ continue
153+
154+ if in_healthcheck_continuation :
155+ adapted .append (line )
156+ in_healthcheck_continuation = line .rstrip ().endswith ("\\ " )
157+ continue
158+
97159 if stripped .startswith (("CMD " , "CMD[" , "ENTRYPOINT " , "ENTRYPOINT[" )):
98160 adapted .append (f"# [harbor original] { line } " )
99161 else :
@@ -167,26 +229,64 @@ def _parse_task(task_dir: Path) -> HarborTask | None:
167229
168230import json
169231import logging
232+ import os
170233import subprocess
171234from pathlib import Path
172235{extra_imports}
173236from hud import Environment
174237from hud.tools import BashTool, EditTool
175238from hud.tools.filesystem import GlobTool, GrepTool, ListTool, ReadTool
239+ from hud.tools.types import ToolError
176240
177241LOGGER = logging.getLogger(__name__)
178242
179- TASKS_DIR = Path("/harbor/tasks")
243+ TASKS_DIR = Path("/root/.hud_harbor/tasks")
244+ AGENT_WORKDIR = os.path.expandvars({agent_workdir!r})
245+
246+
247+ def _set_agent_workdir() -> None:
248+ """Put agent shell sessions in the original Harbor challenge workdir."""
249+ try:
250+ os.chdir(AGENT_WORKDIR)
251+ except FileNotFoundError:
252+ if TASKS_DIR.exists():
253+ LOGGER.warning("Agent workdir does not exist: %s", AGENT_WORKDIR)
254+ else:
255+ LOGGER.debug("Skipping container workdir on host import: %s", AGENT_WORKDIR)
256+
257+
258+ _set_agent_workdir()
259+
260+
261+ def _resolve_within_base(file_path: Path, base_path: Path) -> Path:
262+ resolved = file_path.resolve() if file_path.is_absolute() else (base_path / file_path).resolve()
263+ try:
264+ resolved.relative_to(base_path)
265+ except ValueError:
266+ raise ToolError(f"Path escapes base directory: {{file_path}}") from None
267+ return resolved
268+
269+
270+ class ScopedEditTool(EditTool):
271+ """EditTool variant constrained to the task workdir."""
272+
273+ def __init__(self, base_path: str | Path) -> None:
274+ super().__init__()
275+ self._base_path = Path(base_path).resolve()
276+
277+ def validate_path(self, command: str, path: Path) -> None:
278+ resolved = _resolve_within_base(path, self._base_path)
279+ super().validate_path(command, resolved)
180280
181281env = Environment("{env_name}")
182282
183283# Standard coding tools - agents interact via bash (matching Harbor's model)
184- env.add_tool(BashTool())
185- env.add_tool(EditTool( ))
186- env.add_tool(ReadTool())
187- env.add_tool(GrepTool())
188- env.add_tool(GlobTool())
189- env.add_tool(ListTool())
284+ env.add_tool(BashTool(timeout=600.0 ))
285+ env.add_tool(ScopedEditTool(base_path=AGENT_WORKDIR ))
286+ env.add_tool(ReadTool(base_path=AGENT_WORKDIR ))
287+ env.add_tool(GrepTool(base_path=AGENT_WORKDIR ))
288+ env.add_tool(GlobTool(base_path=AGENT_WORKDIR ))
289+ env.add_tool(ListTool(base_path=AGENT_WORKDIR ))
190290
191291'''
192292
@@ -208,7 +308,7 @@ async def run_task(task_id: TaskId):
208308_SCENARIO_BODY = '''\
209309 """Run a Harbor task by ID.
210310
211- Reads /harbor/tasks/<task_id>/ instruction.md as the prompt.
311+ Reads the root-only task bundle's instruction.md as the prompt.
212312 After the agent works, runs tests/test.sh and parses
213313 /logs/verifier/reward.txt or reward.json for the reward.
214314 """
@@ -228,6 +328,11 @@ async def run_task(task_id: TaskId):
228328 # Ensure log output directory exists
229329 logs_dir = Path("/logs/verifier")
230330 logs_dir.mkdir(parents=True, exist_ok=True)
331+ for reward_file in (Path("/logs/verifier/reward.txt"), Path("/logs/verifier/reward.json")):
332+ try:
333+ reward_file.unlink(missing_ok=True)
334+ except OSError as exc:
335+ LOGGER.warning("Failed to clear stale reward file %s: %s", reward_file, exc)
231336
232337 # Harbor mounts the task's tests/ directory at /tests/ — replicate that
233338 tests_link = Path("/tests")
@@ -243,7 +348,7 @@ async def run_task(task_id: TaskId):
243348 try:
244349 result = subprocess.run(
245350 ["bash", str(test_script)],
246- cwd="/app",
351+ cwd=AGENT_WORKDIR if Path(AGENT_WORKDIR).is_dir() else "/app",
247352 capture_output=True,
248353 text=True,
249354 timeout={verifier_timeout},
@@ -303,6 +408,7 @@ def _build_env_py(
303408 source_path : str ,
304409 task_ids : list [str ],
305410 verifier_timeout : int ,
411+ agent_workdir : str ,
306412) -> str :
307413 """Build the env.py content, adapting the scenario signature to task count."""
308414 if len (task_ids ) == 1 :
@@ -318,6 +424,7 @@ def _build_env_py(
318424 source_path = source_path ,
319425 task_count = len (task_ids ),
320426 extra_imports = extra_imports ,
427+ agent_workdir = agent_workdir ,
321428 )
322429 body = _SCENARIO_BODY .format (verifier_timeout = verifier_timeout )
323430 return header + scenario + body
@@ -327,6 +434,14 @@ def _build_env_py(
327434# Shared snippet: install uv standalone (works on any base image with curl or
328435# apt), then use uv to bootstrap Python and sync dependencies.
329436_HUD_LAYER = """\
437+ USER root
438+ # HUD coding subprocesses run as uid/gid 1000, so let them edit the original
439+ # challenge tree while keeping scenario-only task data outside that tree.
440+ RUN agent_workdir={agent_workdir_shell} \\
441+ && eval "agent_workdir=\\ "$agent_workdir\\ "" \\
442+ && mkdir -p /workspace /app \\
443+ && if [ -d "$agent_workdir" ]; then chmod -R a+rwX "$agent_workdir"; fi
444+
330445# ============================================================
331446# HUD MCP server layer
332447# ============================================================
@@ -341,19 +456,22 @@ def _build_env_py(
341456ENV PATH="/root/.local/bin:$PATH"
342457
343458COPY pyproject.toml uv.lock* ./
344- RUN uv sync --frozen --no-dev --no-install-project 2>/dev/null || \\
345- uv sync --no-dev --no-install-project
459+ RUN uv sync --frozen --no-dev --no-install-project --python 3.12 2>/dev/null || \\
460+ uv sync --no-dev --no-install-project --python 3.12
461+ ENV PATH="/hud/.venv/bin:$PATH"
346462
347- # Harbor task data (instructions + test scripts baked into image)
348- COPY tasks/ /harbor/tasks/
463+ # The scenario reads task data directly from a root-only bundle. The agent only
464+ # receives the yielded prompt and task workdir files.
465+ COPY tasks/ /root/.hud_harbor/tasks/
466+ RUN chown -R root:root /root/.hud_harbor && chmod -R go-rwx /root/.hud_harbor
349467
350468# Ensure standard directories exist and are writable at runtime
351469# (MCP server may run as non-root; Harbor tasks expect /app writable)
352470RUN mkdir -p /logs/verifier /workspace /app && chmod 777 /logs/verifier /workspace /app
353471
354472COPY env.py ./
355473
356- CMD ["uv", "run", "--no-project", "python", "-m", " hud", "dev", "env:env", "--stdio"]
474+ CMD ["hud", "dev", "env:env", "--stdio"]
357475"""
358476
359477DOCKERFILE_WITH_BASE_TEMPLATE = (
@@ -457,6 +575,7 @@ def convert(self, path: Path) -> ConvertResult:
457575 # Generate environments and taskset
458576 environments : list [GeneratedEnvironment ] = []
459577 taskset : list [dict [str , Any ]] = []
578+ used_slugs : set [str ] = set ()
460579 base_name = f"hud-harbor-{ _normalize_name (dataset_name )} "
461580
462581 # Sort groups by size (largest first) for consistent naming
@@ -470,6 +589,13 @@ def convert(self, path: Path) -> ConvertResult:
470589 rep_task = group_tasks [0 ]
471590 env_dir = rep_task .directory / "environment"
472591 dockerfile_content = _find_dockerfile (env_dir ) if env_dir .exists () else None
592+ agent_workdir = _extract_workdir (dockerfile_content or "" )
593+ env_cfg = rep_task .config .get ("environment" , {})
594+ if isinstance (env_cfg , dict ):
595+ configured_workdir = env_cfg .get ("workdir" )
596+ if isinstance (configured_workdir , str ) and configured_workdir :
597+ agent_workdir = configured_workdir
598+ agent_workdir_shell = shlex .quote (agent_workdir )
473599
474600 # Extract verifier timeout from config
475601 verifier_timeout = 600
@@ -487,6 +613,7 @@ def convert(self, path: Path) -> ConvertResult:
487613 source_path = path .as_posix (),
488614 task_ids = task_ids ,
489615 verifier_timeout = verifier_timeout ,
616+ agent_workdir = agent_workdir ,
490617 )
491618
492619 # --- Generate Dockerfile.hud ---
@@ -495,9 +622,12 @@ def convert(self, path: Path) -> ConvertResult:
495622 dockerfile = DOCKERFILE_WITH_BASE_TEMPLATE .format (
496623 source = env_dir .as_posix (),
497624 base_dockerfile = adapted ,
625+ agent_workdir_shell = agent_workdir_shell ,
498626 )
499627 else :
500- dockerfile = DOCKERFILE_FALLBACK_TEMPLATE
628+ dockerfile = DOCKERFILE_FALLBACK_TEMPLATE .format (
629+ agent_workdir_shell = agent_workdir_shell ,
630+ )
501631
502632 # --- Generate pyproject.toml ---
503633 pyproject = PYPROJECT_TEMPLATE .format (name = env_name )
@@ -532,10 +662,13 @@ def convert(self, path: Path) -> ConvertResult:
532662
533663 taskset .append (
534664 {
665+ "slug" : _make_task_slug (task .task_id , used_slugs ),
535666 "env" : {"name" : env_name },
536667 "scenario" : f"{ env_name } :run-task" ,
537668 "args" : {"task_id" : task .task_id },
538669 "metadata" : metadata ,
670+ "agent_config" : {"append_setup_output" : False },
671+ "validation" : None ,
539672 }
540673 )
541674
0 commit comments