Skip to content

Commit 3c652e8

Browse files
sjarmakclaude
andcommitted
Fix sg_only verifier wrapper for SWE-bench Pro tasks + add OpenHands verification subset
sgonly_verifier_wrapper.sh: Add _sg_only_post_restore() helper that runs after repo restore but BEFORE agent overlay: - Configures git safe.directory for /app, /workspace, /testbed - Reads before_repo_set_cmd from /tests/config.json (SWE-bench Pro) and executes it to establish correct base commit + test file state This fixes element-web-unread-indicators-diverge-fix-001 which scored 0.0 because the test patch for threads.ts failed to apply — the sg_only restore didn't run before_repo_set_cmd to checkout the right commit. Also adds configs/openhands_verification_subset_20260309.json covering all 9 verifier families (checklist, continuous, diff_similarity, f1, f1_hybrid, ir_checklist, oracle_checks, repo_state_heuristic, semantic_similarity, test_ratio) for harness contract verification. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 949e27e commit 3c652e8

File tree

476 files changed

+21908
-950
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

476 files changed

+21908
-950
lines changed

benchmarks/backups/csb_org_compliance/ccx-compliance-051/tests/sgonly_verifier_wrapper.sh

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,40 @@ overlay_agent_files() {
7777
echo "[sg_only_verifier] Overlaid agent changes onto $targetdir"
7878
}
7979

80+
# ---------------------------------------------------------------------------
81+
# Helper: post-restore steps (safe.directory + before_repo_set_cmd)
82+
# ---------------------------------------------------------------------------
83+
_sg_only_post_restore() {
84+
local workdir="$1"
85+
86+
# Fix git safe.directory for the working directory and common alternates
87+
git config --global --add safe.directory "$workdir" 2>/dev/null || true
88+
for d in /app /workspace /testbed; do
89+
[ "$d" != "$workdir" ] && git config --global --add safe.directory "$d" 2>/dev/null || true
90+
done
91+
92+
# SWE-bench Pro tasks store a before_repo_set_cmd in /tests/config.json
93+
# that must run after restore to set up the correct base commit and
94+
# checkout specific test files from a different commit.
95+
if [ -f /tests/config.json ]; then
96+
BEFORE_CMD=$(python3 -c "
97+
import json, sys
98+
try:
99+
d = json.load(open('/tests/config.json'))
100+
cmd = d.get('before_repo_set_cmd', '')
101+
print(cmd if cmd else '')
102+
except Exception:
103+
print('')
104+
" 2>/dev/null)
105+
if [ -n "$BEFORE_CMD" ]; then
106+
echo "[sg_only_verifier] Running before_repo_set_cmd from config.json"
107+
cd "$workdir"
108+
eval "$BEFORE_CMD" 2>&1 | head -20
109+
echo "[sg_only_verifier] before_repo_set_cmd complete"
110+
fi
111+
fi
112+
}
113+
80114
# ---------------------------------------------------------------------------
81115
# PRIMARY PATH: clone manifest
82116
# ---------------------------------------------------------------------------
@@ -155,7 +189,12 @@ if [ -f "$MANIFEST" ]; then
155189
echo "[sg_only_verifier] Defect injection complete"
156190
fi
157191

158-
# 4. Overlay agent changes
192+
# 4. Post-restore: safe.directory + before_repo_set_cmd
193+
# Must run BEFORE agent overlay so git reset/clean from
194+
# before_repo_set_cmd doesn't wipe agent changes.
195+
_sg_only_post_restore "$WORKDIR"
196+
197+
# 5. Overlay agent changes (on top of the correct base commit)
159198
overlay_agent_files "$WORKDIR"
160199

161200
# Return to working directory
@@ -186,7 +225,12 @@ backup_agent_files "$WORKDIR"
186225
rsync -a --delete /repo_full/ "$WORKDIR/"
187226
echo "[sg_only_verifier] Restored full repo from /repo_full/"
188227

189-
# 3. Overlay agent's changes
228+
# 3. Post-restore: safe.directory + before_repo_set_cmd
229+
# Must run BEFORE agent overlay so git reset/clean from
230+
# before_repo_set_cmd doesn't wipe agent changes.
231+
_sg_only_post_restore "$WORKDIR"
232+
233+
# 4. Overlay agent's changes (on top of the correct base commit)
190234
overlay_agent_files "$WORKDIR"
191235

192236
# Return to working directory

benchmarks/backups/csb_org_compliance/ccx-compliance-115/tests/sgonly_verifier_wrapper.sh

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,40 @@ overlay_agent_files() {
7777
echo "[sg_only_verifier] Overlaid agent changes onto $targetdir"
7878
}
7979

80+
# ---------------------------------------------------------------------------
81+
# Helper: post-restore steps (safe.directory + before_repo_set_cmd)
82+
# ---------------------------------------------------------------------------
83+
_sg_only_post_restore() {
84+
local workdir="$1"
85+
86+
# Fix git safe.directory for the working directory and common alternates
87+
git config --global --add safe.directory "$workdir" 2>/dev/null || true
88+
for d in /app /workspace /testbed; do
89+
[ "$d" != "$workdir" ] && git config --global --add safe.directory "$d" 2>/dev/null || true
90+
done
91+
92+
# SWE-bench Pro tasks store a before_repo_set_cmd in /tests/config.json
93+
# that must run after restore to set up the correct base commit and
94+
# checkout specific test files from a different commit.
95+
if [ -f /tests/config.json ]; then
96+
BEFORE_CMD=$(python3 -c "
97+
import json, sys
98+
try:
99+
d = json.load(open('/tests/config.json'))
100+
cmd = d.get('before_repo_set_cmd', '')
101+
print(cmd if cmd else '')
102+
except Exception:
103+
print('')
104+
" 2>/dev/null)
105+
if [ -n "$BEFORE_CMD" ]; then
106+
echo "[sg_only_verifier] Running before_repo_set_cmd from config.json"
107+
cd "$workdir"
108+
eval "$BEFORE_CMD" 2>&1 | head -20
109+
echo "[sg_only_verifier] before_repo_set_cmd complete"
110+
fi
111+
fi
112+
}
113+
80114
# ---------------------------------------------------------------------------
81115
# PRIMARY PATH: clone manifest
82116
# ---------------------------------------------------------------------------
@@ -155,7 +189,12 @@ if [ -f "$MANIFEST" ]; then
155189
echo "[sg_only_verifier] Defect injection complete"
156190
fi
157191

158-
# 4. Overlay agent changes
192+
# 4. Post-restore: safe.directory + before_repo_set_cmd
193+
# Must run BEFORE agent overlay so git reset/clean from
194+
# before_repo_set_cmd doesn't wipe agent changes.
195+
_sg_only_post_restore "$WORKDIR"
196+
197+
# 5. Overlay agent changes (on top of the correct base commit)
159198
overlay_agent_files "$WORKDIR"
160199

161200
# Return to working directory
@@ -186,7 +225,12 @@ backup_agent_files "$WORKDIR"
186225
rsync -a --delete /repo_full/ "$WORKDIR/"
187226
echo "[sg_only_verifier] Restored full repo from /repo_full/"
188227

189-
# 3. Overlay agent's changes
228+
# 3. Post-restore: safe.directory + before_repo_set_cmd
229+
# Must run BEFORE agent overlay so git reset/clean from
230+
# before_repo_set_cmd doesn't wipe agent changes.
231+
_sg_only_post_restore "$WORKDIR"
232+
233+
# 4. Overlay agent's changes (on top of the correct base commit)
190234
overlay_agent_files "$WORKDIR"
191235

192236
# Return to working directory

benchmarks/backups/csb_org_compliance/ccx-compliance-118/tests/sgonly_verifier_wrapper.sh

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,40 @@ overlay_agent_files() {
7777
echo "[sg_only_verifier] Overlaid agent changes onto $targetdir"
7878
}
7979

80+
# ---------------------------------------------------------------------------
81+
# Helper: post-restore steps (safe.directory + before_repo_set_cmd)
82+
# ---------------------------------------------------------------------------
83+
_sg_only_post_restore() {
84+
local workdir="$1"
85+
86+
# Fix git safe.directory for the working directory and common alternates
87+
git config --global --add safe.directory "$workdir" 2>/dev/null || true
88+
for d in /app /workspace /testbed; do
89+
[ "$d" != "$workdir" ] && git config --global --add safe.directory "$d" 2>/dev/null || true
90+
done
91+
92+
# SWE-bench Pro tasks store a before_repo_set_cmd in /tests/config.json
93+
# that must run after restore to set up the correct base commit and
94+
# checkout specific test files from a different commit.
95+
if [ -f /tests/config.json ]; then
96+
BEFORE_CMD=$(python3 -c "
97+
import json, sys
98+
try:
99+
d = json.load(open('/tests/config.json'))
100+
cmd = d.get('before_repo_set_cmd', '')
101+
print(cmd if cmd else '')
102+
except Exception:
103+
print('')
104+
" 2>/dev/null)
105+
if [ -n "$BEFORE_CMD" ]; then
106+
echo "[sg_only_verifier] Running before_repo_set_cmd from config.json"
107+
cd "$workdir"
108+
eval "$BEFORE_CMD" 2>&1 | head -20
109+
echo "[sg_only_verifier] before_repo_set_cmd complete"
110+
fi
111+
fi
112+
}
113+
80114
# ---------------------------------------------------------------------------
81115
# PRIMARY PATH: clone manifest
82116
# ---------------------------------------------------------------------------
@@ -155,7 +189,12 @@ if [ -f "$MANIFEST" ]; then
155189
echo "[sg_only_verifier] Defect injection complete"
156190
fi
157191

158-
# 4. Overlay agent changes
192+
# 4. Post-restore: safe.directory + before_repo_set_cmd
193+
# Must run BEFORE agent overlay so git reset/clean from
194+
# before_repo_set_cmd doesn't wipe agent changes.
195+
_sg_only_post_restore "$WORKDIR"
196+
197+
# 5. Overlay agent changes (on top of the correct base commit)
159198
overlay_agent_files "$WORKDIR"
160199

161200
# Return to working directory
@@ -186,7 +225,12 @@ backup_agent_files "$WORKDIR"
186225
rsync -a --delete /repo_full/ "$WORKDIR/"
187226
echo "[sg_only_verifier] Restored full repo from /repo_full/"
188227

189-
# 3. Overlay agent's changes
228+
# 3. Post-restore: safe.directory + before_repo_set_cmd
229+
# Must run BEFORE agent overlay so git reset/clean from
230+
# before_repo_set_cmd doesn't wipe agent changes.
231+
_sg_only_post_restore "$WORKDIR"
232+
233+
# 4. Overlay agent's changes (on top of the correct base commit)
190234
overlay_agent_files "$WORKDIR"
191235

192236
# Return to working directory

benchmarks/backups/csb_org_compliance/ccx-compliance-185/tests/sgonly_verifier_wrapper.sh

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,40 @@ overlay_agent_files() {
7777
echo "[sg_only_verifier] Overlaid agent changes onto $targetdir"
7878
}
7979

80+
# ---------------------------------------------------------------------------
81+
# Helper: post-restore steps (safe.directory + before_repo_set_cmd)
82+
# ---------------------------------------------------------------------------
83+
_sg_only_post_restore() {
84+
local workdir="$1"
85+
86+
# Fix git safe.directory for the working directory and common alternates
87+
git config --global --add safe.directory "$workdir" 2>/dev/null || true
88+
for d in /app /workspace /testbed; do
89+
[ "$d" != "$workdir" ] && git config --global --add safe.directory "$d" 2>/dev/null || true
90+
done
91+
92+
# SWE-bench Pro tasks store a before_repo_set_cmd in /tests/config.json
93+
# that must run after restore to set up the correct base commit and
94+
# checkout specific test files from a different commit.
95+
if [ -f /tests/config.json ]; then
96+
BEFORE_CMD=$(python3 -c "
97+
import json, sys
98+
try:
99+
d = json.load(open('/tests/config.json'))
100+
cmd = d.get('before_repo_set_cmd', '')
101+
print(cmd if cmd else '')
102+
except Exception:
103+
print('')
104+
" 2>/dev/null)
105+
if [ -n "$BEFORE_CMD" ]; then
106+
echo "[sg_only_verifier] Running before_repo_set_cmd from config.json"
107+
cd "$workdir"
108+
eval "$BEFORE_CMD" 2>&1 | head -20
109+
echo "[sg_only_verifier] before_repo_set_cmd complete"
110+
fi
111+
fi
112+
}
113+
80114
# ---------------------------------------------------------------------------
81115
# PRIMARY PATH: clone manifest
82116
# ---------------------------------------------------------------------------
@@ -155,7 +189,12 @@ if [ -f "$MANIFEST" ]; then
155189
echo "[sg_only_verifier] Defect injection complete"
156190
fi
157191

158-
# 4. Overlay agent changes
192+
# 4. Post-restore: safe.directory + before_repo_set_cmd
193+
# Must run BEFORE agent overlay so git reset/clean from
194+
# before_repo_set_cmd doesn't wipe agent changes.
195+
_sg_only_post_restore "$WORKDIR"
196+
197+
# 5. Overlay agent changes (on top of the correct base commit)
159198
overlay_agent_files "$WORKDIR"
160199

161200
# Return to working directory
@@ -186,7 +225,12 @@ backup_agent_files "$WORKDIR"
186225
rsync -a --delete /repo_full/ "$WORKDIR/"
187226
echo "[sg_only_verifier] Restored full repo from /repo_full/"
188227

189-
# 3. Overlay agent's changes
228+
# 3. Post-restore: safe.directory + before_repo_set_cmd
229+
# Must run BEFORE agent overlay so git reset/clean from
230+
# before_repo_set_cmd doesn't wipe agent changes.
231+
_sg_only_post_restore "$WORKDIR"
232+
233+
# 4. Overlay agent's changes (on top of the correct base commit)
190234
overlay_agent_files "$WORKDIR"
191235

192236
# Return to working directory

benchmarks/backups/csb_org_compliance/ccx-compliance-186/tests/sgonly_verifier_wrapper.sh

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,40 @@ overlay_agent_files() {
7777
echo "[sg_only_verifier] Overlaid agent changes onto $targetdir"
7878
}
7979

80+
# ---------------------------------------------------------------------------
81+
# Helper: post-restore steps (safe.directory + before_repo_set_cmd)
82+
# ---------------------------------------------------------------------------
83+
_sg_only_post_restore() {
84+
local workdir="$1"
85+
86+
# Fix git safe.directory for the working directory and common alternates
87+
git config --global --add safe.directory "$workdir" 2>/dev/null || true
88+
for d in /app /workspace /testbed; do
89+
[ "$d" != "$workdir" ] && git config --global --add safe.directory "$d" 2>/dev/null || true
90+
done
91+
92+
# SWE-bench Pro tasks store a before_repo_set_cmd in /tests/config.json
93+
# that must run after restore to set up the correct base commit and
94+
# checkout specific test files from a different commit.
95+
if [ -f /tests/config.json ]; then
96+
BEFORE_CMD=$(python3 -c "
97+
import json, sys
98+
try:
99+
d = json.load(open('/tests/config.json'))
100+
cmd = d.get('before_repo_set_cmd', '')
101+
print(cmd if cmd else '')
102+
except Exception:
103+
print('')
104+
" 2>/dev/null)
105+
if [ -n "$BEFORE_CMD" ]; then
106+
echo "[sg_only_verifier] Running before_repo_set_cmd from config.json"
107+
cd "$workdir"
108+
eval "$BEFORE_CMD" 2>&1 | head -20
109+
echo "[sg_only_verifier] before_repo_set_cmd complete"
110+
fi
111+
fi
112+
}
113+
80114
# ---------------------------------------------------------------------------
81115
# PRIMARY PATH: clone manifest
82116
# ---------------------------------------------------------------------------
@@ -155,7 +189,12 @@ if [ -f "$MANIFEST" ]; then
155189
echo "[sg_only_verifier] Defect injection complete"
156190
fi
157191

158-
# 4. Overlay agent changes
192+
# 4. Post-restore: safe.directory + before_repo_set_cmd
193+
# Must run BEFORE agent overlay so git reset/clean from
194+
# before_repo_set_cmd doesn't wipe agent changes.
195+
_sg_only_post_restore "$WORKDIR"
196+
197+
# 5. Overlay agent changes (on top of the correct base commit)
159198
overlay_agent_files "$WORKDIR"
160199

161200
# Return to working directory
@@ -186,7 +225,12 @@ backup_agent_files "$WORKDIR"
186225
rsync -a --delete /repo_full/ "$WORKDIR/"
187226
echo "[sg_only_verifier] Restored full repo from /repo_full/"
188227

189-
# 3. Overlay agent's changes
228+
# 3. Post-restore: safe.directory + before_repo_set_cmd
229+
# Must run BEFORE agent overlay so git reset/clean from
230+
# before_repo_set_cmd doesn't wipe agent changes.
231+
_sg_only_post_restore "$WORKDIR"
232+
233+
# 4. Overlay agent's changes (on top of the correct base commit)
190234
overlay_agent_files "$WORKDIR"
191235

192236
# Return to working directory

0 commit comments

Comments
 (0)