Skip to content

Commit 01b8113

Browse files
sjarmakclaude
andcommitted
fix: OpenHands python3 fallback + multi-account support
- agent.py: Use $(command -v python3 || command -v python) for auth proxy startup, fixing containers that only have 'python' (e.g., kafka tasks) - openhands_2config.sh: Enable multi-account rotation via setup_multi_accounts instead of hardcoding CLAUDE_HOMES=("$HOME"). Add per-task OAuth token extraction from account-specific credentials files. - Add openhands_mcp_rerun.json: 30-task subset excluding 5 tasks needing 20GB storage (incompatible with Daytona 10GB limit) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 2c0667c commit 01b8113

File tree

5 files changed

+267
-6
lines changed

5 files changed

+267
-6
lines changed

agents/harnesses/openhands/agent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,11 @@ async def _configure_mcp(self, environment: BaseEnvironment) -> None:
105105
source_path=proxy_src,
106106
target_path="/tmp/sg_auth_proxy.py",
107107
)
108-
# Start proxy as background daemon
108+
# Start proxy as background daemon (try python3, fall back to python)
109109
start_cmd = (
110110
f"SG_MCP_URL={mcp_endpoint} "
111111
f"SG_MCP_TOKEN={sg_token} "
112-
f"nohup python3 /tmp/sg_auth_proxy.py --port {self._SG_PROXY_PORT} "
112+
f"nohup $(command -v python3 || command -v python) /tmp/sg_auth_proxy.py --port {self._SG_PROXY_PORT} "
113113
f"> /tmp/sg_proxy.log 2>&1 &"
114114
)
115115
await environment.exec(start_cmd)

configs/openhands_2config.sh

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,12 +160,12 @@ for row in "${TASK_ROWS[@]}"; do
160160
done
161161

162162
if [ -z "${PARALLEL_JOBS:-}" ] || [ "$PARALLEL_JOBS" -lt 1 ] 2>/dev/null; then
163-
PARALLEL_JOBS=1
163+
PARALLEL_JOBS=0 # sentinel; setup_multi_accounts will auto-set
164164
fi
165165

166-
# run_tasks_parallel expects CLAUDE_HOMES; use current HOME for OpenHands harness runs.
167-
CLAUDE_HOMES=("$HOME")
166+
# Multi-account support: rotate OAuth tokens across accounts.
168167
REAL_HOME="$HOME"
168+
setup_multi_accounts
169169

170170
_model_lower=$(echo "$MODEL" | awk -F/ '{print $NF}' | tr '[:upper:]' '[:lower:]')
171171
case "$_model_lower" in
@@ -211,6 +211,23 @@ _openhands_run_single() {
211211
local jobs_subdir="${jobs_base}/${config}"
212212
local task_path="${TASK_PATH_BY_ID[$task_id]}"
213213

214+
# Extract ANTHROPIC_API_KEY from this account's OAuth credentials.
215+
# run_tasks_parallel sets HOME=$_task_home for account rotation.
216+
if [ "$USE_SUBSCRIPTION" = "true" ]; then
217+
local _acct_token
218+
_acct_token=$(python3 -c "
219+
import json, os
220+
creds_file = os.path.join('${_task_home}', '.claude', '.credentials.json')
221+
if os.path.exists(creds_file):
222+
creds = json.load(open(creds_file))
223+
token = creds.get('claudeAiOauth', {}).get('accessToken', '')
224+
if token: print(token)
225+
" 2>/dev/null)
226+
if [ -n "$_acct_token" ]; then
227+
export ANTHROPIC_API_KEY="$_acct_token"
228+
fi
229+
fi
230+
214231
case "$mcp_type" in
215232
none|sourcegraph_full)
216233
;;

configs/openhands_mcp_rerun.json

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
{
2+
"metadata": {
3+
"name": "openhands_mcp_rerun",
4+
"description": "30-task MCP-only rerun (excluded 5 tasks needing 20GB storage)",
5+
"parent": "openhands_subset.json",
6+
"excluded_20gb": [
7+
"cilium-endpoint-manager-refac-001",
8+
"clickhouse-mergetree-arch-understand-001",
9+
"envoy-listener-manager-refac-001",
10+
"flink-window-late-data-fix-001",
11+
"pytorch-gradient-noise-feat-001"
12+
]
13+
},
14+
"tasks": [
15+
{
16+
"task_id": "kafka-batch-accumulator-refac-001",
17+
"benchmark": "csb_sdlc_refactor",
18+
"task_dir": "csb_sdlc_refactor/kafka-batch-accumulator-refac-001",
19+
"language": "java",
20+
"execution_env": "daytona"
21+
},
22+
{
23+
"task_id": "servo-scrollend-event-feat-001",
24+
"benchmark": "csb_sdlc_feature",
25+
"task_dir": "csb_sdlc_feature/servo-scrollend-event-feat-001",
26+
"language": "rust",
27+
"execution_env": "daytona"
28+
},
29+
{
30+
"task_id": "numpy-dtype-localize-001",
31+
"benchmark": "csb_sdlc_understand",
32+
"task_dir": "csb_sdlc_understand/numpy-dtype-localize-001",
33+
"language": "python",
34+
"execution_env": "daytona"
35+
},
36+
{
37+
"task_id": "cilium-ebpf-fault-qa-001",
38+
"benchmark": "csb_sdlc_understand",
39+
"task_dir": "csb_sdlc_understand/cilium-ebpf-fault-qa-001",
40+
"language": "go",
41+
"execution_env": "daytona"
42+
},
43+
{
44+
"task_id": "kafka-build-orient-001",
45+
"benchmark": "csb_sdlc_understand",
46+
"task_dir": "csb_sdlc_understand/kafka-build-orient-001",
47+
"language": "java",
48+
"execution_env": "daytona"
49+
},
50+
{
51+
"task_id": "python-http-class-naming-refac-001",
52+
"benchmark": "csb_sdlc_refactor",
53+
"task_dir": "csb_sdlc_refactor/python-http-class-naming-refac-001",
54+
"language": "python",
55+
"execution_env": "daytona"
56+
},
57+
{
58+
"task_id": "django-composite-field-recover-001",
59+
"benchmark": "csb_sdlc_understand",
60+
"task_dir": "csb_sdlc_understand/django-composite-field-recover-001",
61+
"language": "python",
62+
"execution_env": "daytona"
63+
},
64+
{
65+
"task_id": "envoy-routeconfig-dep-chain-001",
66+
"benchmark": "csb_sdlc_design",
67+
"task_dir": "csb_sdlc_design/envoy-routeconfig-dep-chain-001",
68+
"language": "go",
69+
"execution_env": "daytona"
70+
},
71+
{
72+
"task_id": "django-select-for-update-fix-001",
73+
"benchmark": "csb_sdlc_fix",
74+
"task_dir": "csb_sdlc_fix/django-select-for-update-fix-001",
75+
"language": "python",
76+
"execution_env": "daytona"
77+
},
78+
{
79+
"task_id": "terraform-plan-null-unknown-fix-001",
80+
"benchmark": "csb_sdlc_fix",
81+
"task_dir": "csb_sdlc_fix/terraform-plan-null-unknown-fix-001",
82+
"language": "go",
83+
"execution_env": "daytona"
84+
},
85+
{
86+
"task_id": "ccx-migration-027",
87+
"benchmark": "csb_org_migration",
88+
"task_dir": "csb_org_migration/ccx-migration-027",
89+
"language": "javascript",
90+
"execution_env": "daytona"
91+
},
92+
{
93+
"task_id": "CCX-crossorg-121",
94+
"benchmark": "csb_org_crossorg",
95+
"task_dir": "csb_org_crossorg/ccx-crossorg-121",
96+
"language": "cpp",
97+
"execution_env": "daytona"
98+
},
99+
{
100+
"task_id": "CCX-crossorg-132",
101+
"benchmark": "csb_org_crossorg",
102+
"task_dir": "csb_org_crossorg/ccx-crossorg-132",
103+
"language": "rust",
104+
"execution_env": "daytona"
105+
},
106+
{
107+
"task_id": "CCX-dep-trace-133",
108+
"benchmark": "csb_org_crossrepo_tracing",
109+
"task_dir": "csb_org_crossrepo_tracing/ccx-dep-trace-133",
110+
"language": "cpp",
111+
"execution_env": "daytona"
112+
},
113+
{
114+
"task_id": "CCX-domain-137",
115+
"benchmark": "csb_org_domain",
116+
"task_dir": "csb_org_domain/ccx-domain-137",
117+
"language": "java",
118+
"execution_env": "daytona"
119+
},
120+
{
121+
"task_id": "CCX-domain-156",
122+
"benchmark": "csb_org_domain",
123+
"task_dir": "csb_org_domain/ccx-domain-156",
124+
"language": "go",
125+
"execution_env": "daytona"
126+
},
127+
{
128+
"task_id": "CCX-dep-trace-181",
129+
"benchmark": "csb_org_crossrepo_tracing",
130+
"task_dir": "csb_org_crossrepo_tracing/ccx-dep-trace-181",
131+
"language": "java",
132+
"execution_env": "daytona"
133+
},
134+
{
135+
"task_id": "CCX-migration-195",
136+
"benchmark": "csb_org_migration",
137+
"task_dir": "csb_org_migration/ccx-migration-195",
138+
"language": "java",
139+
"execution_env": "daytona"
140+
},
141+
{
142+
"task_id": "CCX-migration-204",
143+
"benchmark": "csb_org_migration",
144+
"task_dir": "csb_org_migration/ccx-migration-204",
145+
"language": "python",
146+
"execution_env": "daytona"
147+
},
148+
{
149+
"task_id": "CCX-crossorg-209",
150+
"benchmark": "csb_org_crossorg",
151+
"task_dir": "csb_org_crossorg/ccx-crossorg-209",
152+
"language": "go",
153+
"execution_env": "daytona"
154+
},
155+
{
156+
"task_id": "CCX-crossorg-217",
157+
"benchmark": "csb_org_crossorg",
158+
"task_dir": "csb_org_crossorg/ccx-crossorg-217",
159+
"language": "python",
160+
"execution_env": "daytona"
161+
},
162+
{
163+
"task_id": "CCX-crossorg-221",
164+
"benchmark": "csb_org_crossorg",
165+
"task_dir": "csb_org_crossorg/ccx-crossorg-221",
166+
"language": "java",
167+
"execution_env": "daytona"
168+
},
169+
{
170+
"task_id": "CCX-dep-trace-254",
171+
"benchmark": "csb_org_crossrepo",
172+
"task_dir": "csb_org_crossrepo/ccx-dep-trace-254",
173+
"language": "cpp",
174+
"execution_env": "daytona"
175+
},
176+
{
177+
"task_id": "CCX-dep-trace-258",
178+
"benchmark": "csb_org_crossrepo",
179+
"task_dir": "csb_org_crossrepo/ccx-dep-trace-258",
180+
"language": "go",
181+
"execution_env": "daytona"
182+
},
183+
{
184+
"task_id": "CCX-dep-trace-268",
185+
"benchmark": "csb_org_crossrepo",
186+
"task_dir": "csb_org_crossrepo/ccx-dep-trace-268",
187+
"language": "rust",
188+
"execution_env": "daytona"
189+
},
190+
{
191+
"task_id": "CCX-dep-trace-271",
192+
"benchmark": "csb_org_crossrepo",
193+
"task_dir": "csb_org_crossrepo/ccx-dep-trace-271",
194+
"language": "java",
195+
"execution_env": "daytona"
196+
},
197+
{
198+
"task_id": "pytorch-relu-gelu-fusion-fix-001",
199+
"benchmark": "csb_sdlc_fix",
200+
"task_dir": "csb_sdlc_fix/pytorch-relu-gelu-fusion-fix-001",
201+
"language": "cpp",
202+
"execution_env": "daytona"
203+
},
204+
{
205+
"task_id": "pytorch-release-210-fix-001",
206+
"benchmark": "csb_sdlc_fix",
207+
"task_dir": "csb_sdlc_fix/pytorch-release-210-fix-001",
208+
"language": "cpp",
209+
"execution_env": "daytona"
210+
},
211+
{
212+
"task_id": "ccx-dep-trace-272",
213+
"benchmark": "csb_org_crossrepo_tracing",
214+
"task_dir": "csb_org_crossrepo_tracing/ccx-dep-trace-272",
215+
"language": "cpp",
216+
"execution_env": "daytona"
217+
},
218+
{
219+
"task_id": "ccx-dep-trace-293",
220+
"benchmark": "csb_org_crossrepo_tracing",
221+
"task_dir": "csb_org_crossrepo_tracing/ccx-dep-trace-293",
222+
"language": "csharp",
223+
"execution_env": "daytona"
224+
}
225+
]
226+
}

docs/ops/SCRIPT_INDEX.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ Generated from `scripts/registry.json` by `scripts/generate_script_index.py`.
172172
- `scripts/audit_unpinned_repos.py` - Utility script for audit unpinned repos.
173173
- `scripts/audit_v2_report_data.py` - Utility script for audit v2 report data.
174174
- `scripts/backfill_instruction_artifacts.py` [one_off] - Historical one-off script: backfill instruction artifacts.
175+
- `scripts/backfill_reviewers.py` [one_off] - Historical one-off script: backfill reviewers.
175176
- `scripts/backfill_size_metadata.py` [one_off] - Historical one-off script: backfill size metadata.
176177
- `scripts/backfill_triage_from_manifest.py` [one_off] - Historical one-off script: backfill triage from manifest.
177178
- `scripts/check_harness_readiness.py` - Utility script for check harness readiness.
@@ -241,6 +242,7 @@ Generated from `scripts/registry.json` by `scripts/generate_script_index.py`.
241242
- `scripts/run_judge.py` - Utility script for run judge.
242243
- `scripts/run_missing_oracles.sh` - Utility script for run missing oracles.
243244
- `scripts/run_scaling_gap_oracles.sh` - Utility script for run scaling gap oracles.
245+
- `scripts/run_sg_validation.py` - Utility script for run sg validation.
244246
- `scripts/scaffold_contextbench_tasks.py` - Utility script for scaffold contextbench tasks.
245247
- `scripts/scaffold_feature_tasks.py` - Utility script for scaffold feature tasks.
246248
- `scripts/scaffold_refactor_tasks.py` - Utility script for scaffold refactor tasks.

scripts/registry.json

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,14 @@
146146
"language": "python",
147147
"summary": "Historical one-off script: backfill instruction artifacts."
148148
},
149+
{
150+
"name": "backfill_reviewers.py",
151+
"path": "scripts/backfill_reviewers.py",
152+
"category": "misc",
153+
"status": "one_off",
154+
"language": "python",
155+
"summary": "Historical one-off script: backfill reviewers."
156+
},
149157
{
150158
"name": "backfill_size_metadata.py",
151159
"path": "scripts/backfill_size_metadata.py",
@@ -1338,6 +1346,14 @@
13381346
"language": "shell",
13391347
"summary": "Utility script for run scaling gap oracles."
13401348
},
1349+
{
1350+
"name": "run_sg_validation.py",
1351+
"path": "scripts/run_sg_validation.py",
1352+
"category": "misc",
1353+
"status": "maintained",
1354+
"language": "python",
1355+
"summary": "Utility script for run sg validation."
1356+
},
13411357
{
13421358
"name": "scaffold_contextbench_tasks.py",
13431359
"path": "scripts/scaffold_contextbench_tasks.py",
@@ -1643,7 +1659,7 @@
16431659
"infra_mirrors": 20,
16441660
"library_helpers": 7,
16451661
"migration": 4,
1646-
"misc": 87,
1662+
"misc": 89,
16471663
"qa_quality": 10,
16481664
"submission_reporting": 7,
16491665
"task_creation_selection": 13,

0 commit comments

Comments
 (0)