Skip to content

Commit 30c785c

Browse files
sjarmakclaude
andcommitted
fix: add /workspace symlink to sg_only Dockerfiles for all 9 prove tasks
All 9 regression-prove tasks in ccb_debug had `ln -sf /app /workspace` in their baseline Dockerfiles (needed so PATCH_APPLY_DIR=/workspace resolves to the actual codebase in /app), but the symlink was missing from Dockerfile.sg_only. Root cause of MCP scoring 0.00 on 8 prove tasks (BL=0.50): - In sg_only mode, /workspace did not exist and claude user couldn't create it - Agent correctly wrote regression_test.py to /app/ instead - find_and_prove_verifier.sh looked for AGENT_TEST_PATH=/workspace/regression_test.* - Verifier always found "file not found" → scored 0.0 Fix: add `RUN ln -sf /app /workspace || true` to all 9 Dockerfile.sg_only files, matching what baseline Dockerfiles already had. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 0fd63fe commit 30c785c

File tree

9 files changed

+36
-0
lines changed

9 files changed

+36
-0
lines changed

benchmarks/ccb_debug/ansible-galaxy-tar-regression-prove-001/environment/Dockerfile.sg_only

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ RUN echo '{"workdir":"/app","repos":[{"mirror":"sg-evals/ansible--b2a289dc","tar
2727
# Mark sg_only mode
2828
RUN touch /tmp/.sg_only_mode && echo '/app' > /tmp/.sg_only_workdir
2929

30+
# Symlink /workspace -> /app so verifier PATCH_APPLY_DIR works
31+
RUN ln -sf /app /workspace || true
32+
33+
3034
# Pre-create claude user and set ownership at build time.
3135
RUN (adduser --disabled-password --gecos '' claude 2>/dev/null || true) && \
3236
for d in /workspace /app /testbed /logs; do [ -d "$d" ] && chown -R claude:claude "$d"; done || true

benchmarks/ccb_debug/flipt-auth-cookie-regression-prove-001/environment/Dockerfile.sg_only

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ RUN echo '{"workdir":"/app","repos":[{"mirror":"sg-evals/flipt--3d5a345f","targe
2525
# Mark sg_only mode
2626
RUN touch /tmp/.sg_only_mode && echo '/app' > /tmp/.sg_only_workdir
2727

28+
# Symlink /workspace -> /app so verifier PATCH_APPLY_DIR works
29+
RUN ln -sf /app /workspace || true
30+
31+
2832
# Pre-create claude user and set ownership at build time.
2933
RUN (adduser --disabled-password --gecos '' claude 2>/dev/null || true) && \
3034
for d in /workspace /app /testbed /logs; do [ -d "$d" ] && chown -R claude:claude "$d"; done || true

benchmarks/ccb_debug/qutebrowser-adblock-cache-regression-prove-001/environment/Dockerfile.sg_only

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ RUN echo '{"workdir":"/app","repos":[{"mirror":"sg-evals/qutebrowser--6dd402c0",
2727
# Mark sg_only mode
2828
RUN touch /tmp/.sg_only_mode && echo '/app' > /tmp/.sg_only_workdir
2929

30+
# Symlink /workspace -> /app so verifier PATCH_APPLY_DIR works
31+
RUN ln -sf /app /workspace || true
32+
33+
3034
# Pre-create claude user and set ownership at build time.
3135
RUN (adduser --disabled-password --gecos '' claude 2>/dev/null || true) && \
3236
for d in /workspace /app /testbed /logs; do [ -d "$d" ] && chown -R claude:claude "$d"; done || true

benchmarks/ccb_debug/qutebrowser-darkmode-threshold-regression-prove-001/environment/Dockerfile.sg_only

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ RUN echo '{"workdir":"/app","repos":[{"mirror":"sg-evals/qutebrowser--50efac08",
2727
# Mark sg_only mode
2828
RUN touch /tmp/.sg_only_mode && echo '/app' > /tmp/.sg_only_workdir
2929

30+
# Symlink /workspace -> /app so verifier PATCH_APPLY_DIR works
31+
RUN ln -sf /app /workspace || true
32+
33+
3034
# Pre-create claude user and set ownership at build time.
3135
RUN (adduser --disabled-password --gecos '' claude 2>/dev/null || true) && \
3236
for d in /workspace /app /testbed /logs; do [ -d "$d" ] && chown -R claude:claude "$d"; done || true

benchmarks/ccb_debug/qutebrowser-hsv-color-regression-prove-001/environment/Dockerfile.sg_only

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ RUN echo '{"workdir":"/app","repos":[{"mirror":"sg-evals/qutebrowser--6b320dc1",
2727
# Mark sg_only mode
2828
RUN touch /tmp/.sg_only_mode && echo '/app' > /tmp/.sg_only_workdir
2929

30+
# Symlink /workspace -> /app so verifier PATCH_APPLY_DIR works
31+
RUN ln -sf /app /workspace || true
32+
33+
3034
# Pre-create claude user and set ownership at build time.
3135
RUN (adduser --disabled-password --gecos '' claude 2>/dev/null || true) && \
3236
for d in /workspace /app /testbed /logs; do [ -d "$d" ] && chown -R claude:claude "$d"; done || true

benchmarks/ccb_debug/qutebrowser-url-regression-prove-001/environment/Dockerfile.sg_only

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ RUN echo '{"workdir":"/app","repos":[{"mirror":"sg-evals/qutebrowser--deeb15d6",
2727
# Mark sg_only mode
2828
RUN touch /tmp/.sg_only_mode && echo '/app' > /tmp/.sg_only_workdir
2929

30+
# Symlink /workspace -> /app so verifier PATCH_APPLY_DIR works
31+
RUN ln -sf /app /workspace || true
32+
33+
3034
# Pre-create claude user and set ownership at build time.
3135
RUN (adduser --disabled-password --gecos '' claude 2>/dev/null || true) && \
3236
for d in /workspace /app /testbed /logs; do [ -d "$d" ] && chown -R claude:claude "$d"; done || true

benchmarks/ccb_debug/teleport-ssh-regression-prove-001/environment/Dockerfile.sg_only

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ RUN echo '{"workdir":"/app","repos":[{"mirror":"sg-evals/teleport--0415e422","ta
2525
# Mark sg_only mode
2626
RUN touch /tmp/.sg_only_mode && echo '/app' > /tmp/.sg_only_workdir
2727

28+
# Symlink /workspace -> /app so verifier PATCH_APPLY_DIR works
29+
RUN ln -sf /app /workspace || true
30+
31+
2832
# Pre-create claude user and set ownership at build time.
2933
RUN (adduser --disabled-password --gecos '' claude 2>/dev/null || true) && \
3034
for d in /workspace /app /testbed /logs; do [ -d "$d" ] && chown -R claude:claude "$d"; done || true

benchmarks/ccb_debug/tutanota-search-regression-prove-001/environment/Dockerfile.sg_only

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ RUN echo '{"workdir":"/app","repos":[{"mirror":"sg-evals/tutanota--f373ac38","ta
2525
# Mark sg_only mode
2626
RUN touch /tmp/.sg_only_mode && echo '/app' > /tmp/.sg_only_workdir
2727

28+
# Symlink /workspace -> /app so verifier PATCH_APPLY_DIR works
29+
RUN ln -sf /app /workspace || true
30+
31+
2832
# Pre-create claude user and set ownership at build time.
2933
RUN (adduser --disabled-password --gecos '' claude 2>/dev/null || true) && \
3034
for d in /workspace /app /testbed /logs; do [ -d "$d" ] && chown -R claude:claude "$d"; done || true

benchmarks/ccb_debug/vuls-oval-regression-prove-001/environment/Dockerfile.sg_only

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ RUN echo '{"workdir":"/app","repos":[{"mirror":"sg-evals/vuls--139f3a81","target
2525
# Mark sg_only mode
2626
RUN touch /tmp/.sg_only_mode && echo '/app' > /tmp/.sg_only_workdir
2727

28+
# Symlink /workspace -> /app so verifier PATCH_APPLY_DIR works
29+
RUN ln -sf /app /workspace || true
30+
31+
2832
# Pre-create claude user and set ownership at build time.
2933
RUN (adduser --disabled-password --gecos '' claude 2>/dev/null || true) && \
3034
for d in /workspace /app /testbed /logs; do [ -d "$d" ] && chown -R claude:claude "$d"; done || true

0 commit comments

Comments
 (0)