diff --git a/examples/specs/swe-bench-pro-spacedock-codex.yaml b/examples/specs/swe-bench-pro-spacedock-codex.yaml index 48800ec1..93c2c963 100644 --- a/examples/specs/swe-bench-pro-spacedock-codex.yaml +++ b/examples/specs/swe-bench-pro-spacedock-codex.yaml @@ -14,9 +14,12 @@ agent: top_p: null seed: null solver_workflow: ./examples/solver_workflows/codex-benchmark-solver - # SWE-tuned budget — above the 1200s codex default: large repos + long - # test suites need more turns and a longer per-attempt/overall timeout. - max_turns: 400 + # SWE-tuned budget. The codex runtime does NOT honor a custom max_turns — + # it accepts only the default (200); any other value raises + # SpacedockSolverAgentError. So keep max_turns at 200 and buy the + # "large repo + long test suite" headroom via the TIMEOUTS below, which + # govern wall-clock (harbor trial config), not codex turn limits. + max_turns: 200 override_timeout_sec: 5400 max_timeout_sec: 7200 reasoning_effort: xhigh diff --git a/src/razorback/agents/_runtime/codex.py b/src/razorback/agents/_runtime/codex.py index 515f0e47..9afe6569 100644 --- a/src/razorback/agents/_runtime/codex.py +++ b/src/razorback/agents/_runtime/codex.py @@ -209,9 +209,16 @@ def _codex_kwargs(harbor_agent_kwargs: dict[str, Any]) -> dict[str, Any]: if _is_empty_noop(name, value): continue if name not in _CODEX_SUPPORTED_KWARGS: + hint = "" + if name == "max_turns": + hint = ( + " The codex runtime accepts only the default max_turns (200); " + "keep it at 200 and budget wall-clock via " + "override_timeout_sec / max_timeout_sec instead." + ) raise SpacedockSolverAgentError( "codex runtime adapter cannot honor unsupported harbor_agent_kwargs " - f"field {name!r}; refusing to silently drop it." + f"field {name!r}; refusing to silently drop it.{hint}" ) kw[name] = value return kw diff --git a/src/razorback/harbor_tasks/materialize.py b/src/razorback/harbor_tasks/materialize.py index e34568a4..5a664199 100644 --- a/src/razorback/harbor_tasks/materialize.py +++ b/src/razorback/harbor_tasks/materialize.py @@ -113,12 +113,41 @@ def _reflect_allowed_files( target.mkdir(parents=True, exist_ok=True) continue target.parent.mkdir(parents=True, exist_ok=True) - if view_mode == "copy": + # The `environment/` subtree is the Docker build context (current Harbor + # convention): `docker compose build` runs with it as the project + # directory, and BuildKit cannot read a Dockerfile (or any build input) + # that symlinks outside the context ("failed to read dockerfile: no such + # file or directory"). So always materialize the build context as real + # files, even in link mode — mirroring how `_patch_task_toml` keeps + # task.toml view-owned. Bulk task files still symlink in link mode (the + # whole point of bind: no eager duplication). + copy_real = view_mode == "copy" or _is_build_context_path(rel) + if path.is_symlink(): + # The name filter above only saw the LINK's own path. A symlink with + # an innocuous name can still point at a denied answer artifact (e.g. + # `environment/leak.patch -> ../gold.patch`); copying follows the link + # and embeds the target's bytes under an allowed view path, and a view + # symlink chases through to it too — bypassing the leakage deny + # boundary. Resolve the target and re-apply the deny check (and + # source containment) so a symlink cannot smuggle denied or + # out-of-tree content into the view. + try: + resolved_rel = path.resolve().relative_to(source).as_posix() + except ValueError: + continue # target escapes the source tree — refuse it + if matches_denied_path(resolved_rel, exclude_globs): + continue + if copy_real: shutil.copy2(path, target) else: os.symlink(path, target) +def _is_build_context_path(rel: Path) -> bool: + """True for files under the `environment/` Docker build context.""" + return rel.parts[:1] == ("environment",) + + def _patch_task_toml( task_toml: Path, *, diff --git a/tests/unit/test_harbor_task_view_materializer.py b/tests/unit/test_harbor_task_view_materializer.py index 723864e5..97e8f19e 100644 --- a/tests/unit/test_harbor_task_view_materializer.py +++ b/tests/unit/test_harbor_task_view_materializer.py @@ -109,6 +109,65 @@ def test_link_mode_symlinks_files_but_never_mutates_source_task_toml(tmp_path): assert "RAZORBACK_BENCHMARK_TASK_ID" not in (source / "task.toml").read_text() +def test_link_mode_copies_environment_build_context_as_real_files(tmp_path): + """`view_mode="link"` must keep the `environment/` Docker build context as + REAL files, not symlinks. `docker compose build` runs with the view's + `environment/` dir as the build context; BuildKit cannot read a Dockerfile + that symlinks outside the context (`failed to read dockerfile: no such file + or directory`), so a symlinked build context breaks every build-from-source + benchmark (e.g. swe-bench-pro) under the default bind/link mode. Bulk task + files still symlink — only the build context is forced real. + """ + source = _write_source_task(tmp_path) + dockerfile_text = (source / "environment" / "Dockerfile").read_text() + + view = materialize_harbor_task_view( + source_task_dir=source, + view_root=tmp_path / "views", + benchmark_kind="fixture-bench", + benchmark_task_id="task-001", + transform_name="fixture-transform", + view_mode="link", + ) + + # the Docker build context is a real, view-owned file (readable by BuildKit) + assert (view / "environment" / "Dockerfile").is_file() + assert not (view / "environment" / "Dockerfile").is_symlink() + assert (view / "environment" / "Dockerfile").read_text() == dockerfile_text + # bulk files outside environment/ still symlink — the bind/link contract holds + assert (view / "instruction.md").is_symlink() + assert (view / "data" / "input.csv").is_symlink() + + +def test_environment_symlink_to_denied_target_is_not_smuggled(tmp_path): + """A symlink under `environment/` with an innocuous name must not smuggle a + DENIED target's bytes into the view. Copying the build context follows + symlinks (shutil.copy2), and the name-based deny filter only sees the link's + own path — so `environment/leak.patch -> ../gold_patch.diff` would otherwise + embed gold-patch content under an allowed view path. The materializer must + resolve the target and re-apply the deny check, dropping it. + """ + source = _write_source_task(tmp_path) + (source / "gold_patch.diff").write_text("--- GOLD ANSWER PATCH ---\n") + (source / "environment" / "leak.patch").symlink_to(source / "gold_patch.diff") + + view = materialize_harbor_task_view( + source_task_dir=source, + view_root=tmp_path / "views", + benchmark_kind="fixture-bench", + benchmark_task_id="task-001", + transform_name="fixture-transform", + exclude_globs=("gold_patch*", "gold.patch"), + view_mode="link", + ) + + # the legit build context is still materialized as a real file … + assert (view / "environment" / "Dockerfile").is_file() + assert not (view / "environment" / "Dockerfile").is_symlink() + # … but the disguised symlink to a denied target is dropped, not embedded + assert not (view / "environment" / "leak.patch").exists() + + def test_materialized_view_is_harbor_taskconfig_path_ready(tmp_path): source = _write_source_task(tmp_path) diff --git a/tests/unit/test_runtime_adapters.py b/tests/unit/test_runtime_adapters.py index 81ec3589..458a88fb 100644 --- a/tests/unit/test_runtime_adapters.py +++ b/tests/unit/test_runtime_adapters.py @@ -501,6 +501,23 @@ def test_codex_rejects_unsupported_contract_kwargs(tmp_path, kwarg): ) +def test_codex_max_turns_rejection_is_actionable(tmp_path): + """A custom max_turns is the most common codex trip (claude honors it, codex + does not). The error must tell the user the fix: keep the default (200) and + budget wall-clock via timeouts — not leave them to reverse-engineer it. + """ + with pytest.raises(SpacedockSolverAgentError) as excinfo: + codex_adapter.build_inner_agent( + logs_dir=tmp_path, + model="gpt-5.1-codex", + harbor_agent_kwargs={"max_turns": 400}, + extra_env={"OPENAI_API_KEY": "sk-fake"}, + ) + message = str(excinfo.value) + assert "200" in message + assert "timeout" in message.lower() + + def test_pi_raises_not_implemented(tmp_path): with pytest.raises(NotImplementedError, match="pi"): pi_adapter.build_inner_agent(