Skip to content

Commit 5950bb2

Browse files
committed
Materialize only selected ContextBench score task
1 parent f30797f commit 5950bb2

1 file changed

Lines changed: 14 additions & 4 deletions

File tree

.github/workflows/contextbench-selected-cbm-score.yml

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@ permissions:
1414
jobs:
1515
selected-cbm-score:
1616
runs-on: ubuntu-latest
17-
timeout-minutes: 30
17+
timeout-minutes: 20
1818
env:
1919
ROOT: /tmp/contextbench-selected-cbm-score
2020
TASK_PAYLOADS: /tmp/contextbench-selected-cbm-score/task-payloads.json
2121
CHECKOUT_ROOT: /tmp/contextbench-checkouts
2222
OFFICIAL_CONTEXTBENCH: /tmp/contextbench-selected-cbm-score/ContextBench-official
23+
TARGET_TASK_ID: SWE-Bench-Pro__go__maintenance__bugfix__4df06349
2324
steps:
2425
- uses: actions/checkout@v4
2526
- uses: pnpm/action-setup@v2
@@ -32,7 +33,7 @@ jobs:
3233
- uses: actions/setup-python@v5
3334
with:
3435
python-version: '3.11'
35-
- name: Install and materialize Go task quietly
36+
- name: Install and materialize selected Go task quietly
3637
shell: bash
3738
run: |
3839
set -euo pipefail
@@ -41,8 +42,17 @@ jobs:
4142
python -m pip install "tree-sitter==0.20.4" "tree-sitter-languages==1.10.2" datasets pyarrow > "$ROOT/logs/pip-install.log" 2>&1
4243
git clone --depth 1 https://github.com/EuniAI/ContextBench.git "$OFFICIAL_CONTEXTBENCH" > "$ROOT/logs/contextbench-clone.log" 2>&1
4344
node scripts/contextbench-runner.mjs --validate-fixtures > "$ROOT/logs/validate-fixtures.log" 2>&1
44-
node scripts/contextbench-select-slice.mjs --write-task-payloads --out "$TASK_PAYLOADS" --checkout-root "$CHECKOUT_ROOT" > "$ROOT/logs/write-payloads.log" 2>&1
45-
node scripts/contextbench-select-slice.mjs --materialize-checkouts --payloads "$TASK_PAYLOADS" --max-tasks 3 > "$ROOT/logs/materialize.log" 2>&1
45+
node scripts/contextbench-select-slice.mjs --write-task-payloads --out "$TASK_PAYLOADS.all" --checkout-root "$CHECKOUT_ROOT" > "$ROOT/logs/write-payloads.log" 2>&1
46+
node - <<'NODE'
47+
const fs = require('node:fs');
48+
const payloadPath = process.env.TASK_PAYLOADS;
49+
const target = process.env.TARGET_TASK_ID;
50+
const payload = JSON.parse(fs.readFileSync(`${payloadPath}.all`, 'utf8'));
51+
const task = payload.tasks.find((candidate) => candidate.instance_id === target);
52+
if (!task) throw new Error(`target task ${target} not found`);
53+
fs.writeFileSync(payloadPath, `${JSON.stringify({ ...payload, task_count: 1, tasks: [task] }, null, 2)}\n`);
54+
NODE
55+
node scripts/contextbench-select-slice.mjs --materialize-checkouts --payloads "$TASK_PAYLOADS" --max-tasks 1 > "$ROOT/logs/materialize.log" 2>&1
4656
echo "selected_score_setup_completed"
4757
- name: Score selected gpt-5.4-mini-high prediction
4858
shell: bash

0 commit comments

Comments
 (0)