Skip to content

Commit 3831b1f

Browse files
committed
Support analysis-layout discovery in results exporter
1 parent 8fda6b0 commit 3831b1f

File tree

1 file changed

+100
-42
lines changed

1 file changed

+100
-42
lines changed

scripts/export_official_results.py

Lines changed: 100 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
if str(PROJECT_ROOT / "scripts") not in sys.path:
4141
sys.path.insert(0, str(PROJECT_ROOT / "scripts"))
4242

43-
from config_utils import discover_configs, is_mcp_config
43+
from config_utils import discover_configs, is_config_dir, is_mcp_config
4444
from official_runs import (
4545
top_level_run_dirs,
4646
load_manifest,
@@ -246,6 +246,56 @@ def _iter_task_dirs(config_dir: Path) -> list[Path]:
246246
return task_dirs
247247

248248

249+
def _suite_from_path_parts(parts: tuple[str, ...], prefix_map: dict[str, str]) -> str:
250+
"""Best-effort suite inference for non-official directory layouts."""
251+
for part in parts:
252+
if part.startswith(("csb_sdlc_", "csb_org_", "ccb_mcp_")):
253+
return part
254+
for part in parts:
255+
if part.startswith(("ccb_", "csb_")):
256+
suite = _suite_from_run_dir(part, prefix_map)
257+
if suite != "unknown":
258+
return suite
259+
return "unknown"
260+
261+
262+
def _iter_analysis_layout_tasks(
263+
runs_dir: Path,
264+
prefix_map: dict[str, str],
265+
run_filter: set[str] | None,
266+
) -> list[tuple[str, str, str, Path]]:
267+
"""Discover tasks in analysis-style layout:
268+
runs/analysis/.../<suite>/<config>/<task>/<trial>/result.json
269+
"""
270+
discovered: list[tuple[str, str, str, Path]] = []
271+
for result_path in sorted(runs_dir.rglob("result.json")):
272+
if any(part in SKIP_DIR_PARTS for part in result_path.parts):
273+
continue
274+
task_dir = result_path.parent
275+
if (task_dir / "__broken_verifier").exists():
276+
continue
277+
try:
278+
payload = json.loads(result_path.read_text())
279+
except Exception:
280+
continue
281+
if not _is_task_result_payload(payload):
282+
continue
283+
284+
rel_parts = result_path.relative_to(runs_dir).parts
285+
config_idx = next((i for i, part in enumerate(rel_parts) if is_config_dir(part)), None)
286+
if config_idx is None:
287+
continue
288+
289+
config = rel_parts[config_idx]
290+
run_dir_name = "/".join(rel_parts[:config_idx]) if config_idx > 0 else "analysis"
291+
if run_filter and run_dir_name not in run_filter:
292+
continue
293+
294+
suite = _suite_from_path_parts(rel_parts, prefix_map)
295+
discovered.append((suite, run_dir_name, config, task_dir))
296+
return discovered
297+
298+
249299
def _extract_reward_and_status(result_payload: dict[str, Any]) -> tuple[float | None, str]:
250300
exception_info = result_payload.get("exception_info")
251301
verifier = result_payload.get("verifier_result") or {}
@@ -1745,53 +1795,61 @@ def build_export(
17451795
audits_dir = output_dir / "audits"
17461796
traces_dir = output_dir / "traces"
17471797

1798+
def _append_task_record(suite: str, run_dir_name: str, config: str, task_dir: Path) -> None:
1799+
normalized_config = _normalize_config_for_suite(suite, config)
1800+
extracted = _extract_task_record(suite, run_dir_name, normalized_config, task_dir, max_examples)
1801+
if extracted is None:
1802+
return
1803+
record, audit_payload = extracted
1804+
1805+
task_slug = _slug(f"{run_dir_name}--{config}--{record.task_name}")
1806+
bundled_trace_paths: dict[str, str | None] = {"trajectory": None, "transcript": None}
1807+
if record.trace_paths.get("trajectory"):
1808+
src = PROJECT_ROOT / record.trace_paths["trajectory"]
1809+
if src.is_file():
1810+
rel = f"traces/{task_slug}/trajectory.json"
1811+
dst = traces_dir / task_slug / "trajectory.json"
1812+
dst.parent.mkdir(parents=True, exist_ok=True)
1813+
shutil.copy2(src, dst)
1814+
bundled_trace_paths["trajectory"] = rel
1815+
if record.trace_paths.get("transcript"):
1816+
src_tx = PROJECT_ROOT / record.trace_paths["transcript"]
1817+
if src_tx.is_file():
1818+
tx_name = src_tx.name
1819+
rel_tx = f"traces/{task_slug}/{tx_name}"
1820+
dst_tx = traces_dir / task_slug / tx_name
1821+
dst_tx.parent.mkdir(parents=True, exist_ok=True)
1822+
shutil.copy2(src_tx, dst_tx)
1823+
bundled_trace_paths["transcript"] = rel_tx
1824+
record.bundled_trace_paths = bundled_trace_paths
1825+
1826+
audit_page_rel = f"audits/{task_slug}.json"
1827+
record.audit_page = audit_page_rel
1828+
_write_text(audits_dir / f"{task_slug}.json", json.dumps(audit_payload, indent=2, sort_keys=True))
1829+
task_page_rel = f"tasks/{task_slug}.html"
1830+
task_page_path = tasks_dir / f"{task_slug}.html"
1831+
_write_text(task_page_path, _build_task_page(record))
1832+
1833+
task_dict = _to_task_dict(
1834+
record,
1835+
task_page_rel,
1836+
output_dir,
1837+
repo_blob_base,
1838+
)
1839+
tasks_out.append(task_dict)
1840+
17481841
for run_dir in run_dirs:
17491842
suite = _suite_from_run_dir(run_dir.name, prefix_map)
17501843
configs = discover_configs(run_dir)
17511844
for config in configs:
1752-
normalized_config = _normalize_config_for_suite(suite, config)
17531845
config_dir = run_dir / config
17541846
for task_dir in _iter_task_dirs(config_dir):
1755-
extracted = _extract_task_record(suite, run_dir.name, normalized_config, task_dir, max_examples)
1756-
if extracted is None:
1757-
continue
1758-
record, audit_payload = extracted
1759-
1760-
task_slug = _slug(f"{run_dir.name}--{config}--{record.task_name}")
1761-
bundled_trace_paths: dict[str, str | None] = {"trajectory": None, "transcript": None}
1762-
if record.trace_paths.get("trajectory"):
1763-
src = PROJECT_ROOT / record.trace_paths["trajectory"]
1764-
if src.is_file():
1765-
rel = f"traces/{task_slug}/trajectory.json"
1766-
dst = traces_dir / task_slug / "trajectory.json"
1767-
dst.parent.mkdir(parents=True, exist_ok=True)
1768-
shutil.copy2(src, dst)
1769-
bundled_trace_paths["trajectory"] = rel
1770-
if record.trace_paths.get("transcript"):
1771-
src_tx = PROJECT_ROOT / record.trace_paths["transcript"]
1772-
if src_tx.is_file():
1773-
tx_name = src_tx.name
1774-
rel_tx = f"traces/{task_slug}/{tx_name}"
1775-
dst_tx = traces_dir / task_slug / tx_name
1776-
dst_tx.parent.mkdir(parents=True, exist_ok=True)
1777-
shutil.copy2(src_tx, dst_tx)
1778-
bundled_trace_paths["transcript"] = rel_tx
1779-
record.bundled_trace_paths = bundled_trace_paths
1780-
1781-
audit_page_rel = f"audits/{task_slug}.json"
1782-
record.audit_page = audit_page_rel
1783-
_write_text(audits_dir / f"{task_slug}.json", json.dumps(audit_payload, indent=2, sort_keys=True))
1784-
task_page_rel = f"tasks/{task_slug}.html"
1785-
task_page_path = tasks_dir / f"{task_slug}.html"
1786-
_write_text(task_page_path, _build_task_page(record))
1787-
1788-
task_dict = _to_task_dict(
1789-
record,
1790-
task_page_rel,
1791-
output_dir,
1792-
repo_blob_base,
1793-
)
1794-
tasks_out.append(task_dict)
1847+
_append_task_record(suite, run_dir.name, config, task_dir)
1848+
1849+
# Fallback: analysis layout does not have official run-dir/config structure.
1850+
if not tasks_out:
1851+
for suite, run_dir_name, config, task_dir in _iter_analysis_layout_tasks(runs_dir, prefix_map, run_filter):
1852+
_append_task_record(suite, run_dir_name, config, task_dir)
17951853

17961854
run_summaries: list[dict[str, Any]] = []
17971855
suite_summaries: list[dict[str, Any]] = []

0 commit comments

Comments
 (0)