@@ -14,12 +14,13 @@ permissions:
1414jobs :
1515 selected-cbm-score :
1616 runs-on : ubuntu-latest
17- timeout-minutes : 30
17+ timeout-minutes : 20
1818 env :
1919 ROOT : /tmp/contextbench-selected-cbm-score
2020 TASK_PAYLOADS : /tmp/contextbench-selected-cbm-score/task-payloads.json
2121 CHECKOUT_ROOT : /tmp/contextbench-checkouts
2222 OFFICIAL_CONTEXTBENCH : /tmp/contextbench-selected-cbm-score/ContextBench-official
23+ TARGET_TASK_ID : SWE-Bench-Pro__go__maintenance__bugfix__4df06349
2324 steps :
2425 - uses : actions/checkout@v4
2526 - uses : pnpm/action-setup@v2
3233 - uses : actions/setup-python@v5
3334 with :
3435 python-version : ' 3.11'
35- - name : Install and materialize Go task quietly
36+ - name : Install and materialize selected Go task quietly
3637 shell : bash
3738 run : |
3839 set -euo pipefail
4142 python -m pip install "tree-sitter==0.20.4" "tree-sitter-languages==1.10.2" datasets pyarrow > "$ROOT/logs/pip-install.log" 2>&1
4243 git clone --depth 1 https://github.com/EuniAI/ContextBench.git "$OFFICIAL_CONTEXTBENCH" > "$ROOT/logs/contextbench-clone.log" 2>&1
4344 node scripts/contextbench-runner.mjs --validate-fixtures > "$ROOT/logs/validate-fixtures.log" 2>&1
44- node scripts/contextbench-select-slice.mjs --write-task-payloads --out "$TASK_PAYLOADS" --checkout-root "$CHECKOUT_ROOT" > "$ROOT/logs/write-payloads.log" 2>&1
45- node scripts/contextbench-select-slice.mjs --materialize-checkouts --payloads "$TASK_PAYLOADS" --max-tasks 3 > "$ROOT/logs/materialize.log" 2>&1
45+ node scripts/contextbench-select-slice.mjs --write-task-payloads --out "$TASK_PAYLOADS.all" --checkout-root "$CHECKOUT_ROOT" > "$ROOT/logs/write-payloads.log" 2>&1
46+ node - <<'NODE'
47+ const fs = require('node:fs');
48+ const payloadPath = process.env.TASK_PAYLOADS;
49+ const target = process.env.TARGET_TASK_ID;
50+ const payload = JSON.parse(fs.readFileSync(`${payloadPath}.all`, 'utf8'));
51+ const task = payload.tasks.find((candidate) => candidate.instance_id === target);
52+ if (!task) throw new Error(`target task ${target} not found`);
53+ fs.writeFileSync(payloadPath, `${JSON.stringify({ ...payload, task_count: 1, tasks: [task] }, null, 2)}\n`);
54+ NODE
55+ node scripts/contextbench-select-slice.mjs --materialize-checkouts --payloads "$TASK_PAYLOADS" --max-tasks 1 > "$ROOT/logs/materialize.log" 2>&1
4656 echo "selected_score_setup_completed"
4757 - name : Score selected gpt-5.4-mini-high prediction
4858 shell : bash
0 commit comments