|
40 | 40 | if str(PROJECT_ROOT / "scripts") not in sys.path: |
41 | 41 | sys.path.insert(0, str(PROJECT_ROOT / "scripts")) |
42 | 42 |
|
43 | | -from config_utils import discover_configs, is_mcp_config |
| 43 | +from config_utils import discover_configs, is_config_dir, is_mcp_config |
44 | 44 | from official_runs import ( |
45 | 45 | top_level_run_dirs, |
46 | 46 | load_manifest, |
@@ -246,6 +246,56 @@ def _iter_task_dirs(config_dir: Path) -> list[Path]: |
246 | 246 | return task_dirs |
247 | 247 |
|
248 | 248 |
|
| 249 | +def _suite_from_path_parts(parts: tuple[str, ...], prefix_map: dict[str, str]) -> str: |
| 250 | + """Best-effort suite inference for non-official directory layouts.""" |
| 251 | + for part in parts: |
| 252 | + if part.startswith(("csb_sdlc_", "csb_org_", "ccb_mcp_")): |
| 253 | + return part |
| 254 | + for part in parts: |
| 255 | + if part.startswith(("ccb_", "csb_")): |
| 256 | + suite = _suite_from_run_dir(part, prefix_map) |
| 257 | + if suite != "unknown": |
| 258 | + return suite |
| 259 | + return "unknown" |
| 260 | + |
| 261 | + |
| 262 | +def _iter_analysis_layout_tasks( |
| 263 | + runs_dir: Path, |
| 264 | + prefix_map: dict[str, str], |
| 265 | + run_filter: set[str] | None, |
| 266 | +) -> list[tuple[str, str, str, Path]]: |
| 267 | + """Discover tasks in analysis-style layout: |
| 268 | + runs/analysis/.../<suite>/<config>/<task>/<trial>/result.json |
| 269 | + """ |
| 270 | + discovered: list[tuple[str, str, str, Path]] = [] |
| 271 | + for result_path in sorted(runs_dir.rglob("result.json")): |
| 272 | + if any(part in SKIP_DIR_PARTS for part in result_path.parts): |
| 273 | + continue |
| 274 | + task_dir = result_path.parent |
| 275 | + if (task_dir / "__broken_verifier").exists(): |
| 276 | + continue |
| 277 | + try: |
| 278 | + payload = json.loads(result_path.read_text()) |
| 279 | + except Exception: |
| 280 | + continue |
| 281 | + if not _is_task_result_payload(payload): |
| 282 | + continue |
| 283 | + |
| 284 | + rel_parts = result_path.relative_to(runs_dir).parts |
| 285 | + config_idx = next((i for i, part in enumerate(rel_parts) if is_config_dir(part)), None) |
| 286 | + if config_idx is None: |
| 287 | + continue |
| 288 | + |
| 289 | + config = rel_parts[config_idx] |
| 290 | + run_dir_name = "/".join(rel_parts[:config_idx]) if config_idx > 0 else "analysis" |
| 291 | + if run_filter and run_dir_name not in run_filter: |
| 292 | + continue |
| 293 | + |
| 294 | + suite = _suite_from_path_parts(rel_parts, prefix_map) |
| 295 | + discovered.append((suite, run_dir_name, config, task_dir)) |
| 296 | + return discovered |
| 297 | + |
| 298 | + |
249 | 299 | def _extract_reward_and_status(result_payload: dict[str, Any]) -> tuple[float | None, str]: |
250 | 300 | exception_info = result_payload.get("exception_info") |
251 | 301 | verifier = result_payload.get("verifier_result") or {} |
@@ -1745,53 +1795,61 @@ def build_export( |
1745 | 1795 | audits_dir = output_dir / "audits" |
1746 | 1796 | traces_dir = output_dir / "traces" |
1747 | 1797 |
|
| 1798 | + def _append_task_record(suite: str, run_dir_name: str, config: str, task_dir: Path) -> None: |
| 1799 | + normalized_config = _normalize_config_for_suite(suite, config) |
| 1800 | + extracted = _extract_task_record(suite, run_dir_name, normalized_config, task_dir, max_examples) |
| 1801 | + if extracted is None: |
| 1802 | + return |
| 1803 | + record, audit_payload = extracted |
| 1804 | + |
| 1805 | + task_slug = _slug(f"{run_dir_name}--{config}--{record.task_name}") |
| 1806 | + bundled_trace_paths: dict[str, str | None] = {"trajectory": None, "transcript": None} |
| 1807 | + if record.trace_paths.get("trajectory"): |
| 1808 | + src = PROJECT_ROOT / record.trace_paths["trajectory"] |
| 1809 | + if src.is_file(): |
| 1810 | + rel = f"traces/{task_slug}/trajectory.json" |
| 1811 | + dst = traces_dir / task_slug / "trajectory.json" |
| 1812 | + dst.parent.mkdir(parents=True, exist_ok=True) |
| 1813 | + shutil.copy2(src, dst) |
| 1814 | + bundled_trace_paths["trajectory"] = rel |
| 1815 | + if record.trace_paths.get("transcript"): |
| 1816 | + src_tx = PROJECT_ROOT / record.trace_paths["transcript"] |
| 1817 | + if src_tx.is_file(): |
| 1818 | + tx_name = src_tx.name |
| 1819 | + rel_tx = f"traces/{task_slug}/{tx_name}" |
| 1820 | + dst_tx = traces_dir / task_slug / tx_name |
| 1821 | + dst_tx.parent.mkdir(parents=True, exist_ok=True) |
| 1822 | + shutil.copy2(src_tx, dst_tx) |
| 1823 | + bundled_trace_paths["transcript"] = rel_tx |
| 1824 | + record.bundled_trace_paths = bundled_trace_paths |
| 1825 | + |
| 1826 | + audit_page_rel = f"audits/{task_slug}.json" |
| 1827 | + record.audit_page = audit_page_rel |
| 1828 | + _write_text(audits_dir / f"{task_slug}.json", json.dumps(audit_payload, indent=2, sort_keys=True)) |
| 1829 | + task_page_rel = f"tasks/{task_slug}.html" |
| 1830 | + task_page_path = tasks_dir / f"{task_slug}.html" |
| 1831 | + _write_text(task_page_path, _build_task_page(record)) |
| 1832 | + |
| 1833 | + task_dict = _to_task_dict( |
| 1834 | + record, |
| 1835 | + task_page_rel, |
| 1836 | + output_dir, |
| 1837 | + repo_blob_base, |
| 1838 | + ) |
| 1839 | + tasks_out.append(task_dict) |
| 1840 | + |
1748 | 1841 | for run_dir in run_dirs: |
1749 | 1842 | suite = _suite_from_run_dir(run_dir.name, prefix_map) |
1750 | 1843 | configs = discover_configs(run_dir) |
1751 | 1844 | for config in configs: |
1752 | | - normalized_config = _normalize_config_for_suite(suite, config) |
1753 | 1845 | config_dir = run_dir / config |
1754 | 1846 | for task_dir in _iter_task_dirs(config_dir): |
1755 | | - extracted = _extract_task_record(suite, run_dir.name, normalized_config, task_dir, max_examples) |
1756 | | - if extracted is None: |
1757 | | - continue |
1758 | | - record, audit_payload = extracted |
1759 | | - |
1760 | | - task_slug = _slug(f"{run_dir.name}--{config}--{record.task_name}") |
1761 | | - bundled_trace_paths: dict[str, str | None] = {"trajectory": None, "transcript": None} |
1762 | | - if record.trace_paths.get("trajectory"): |
1763 | | - src = PROJECT_ROOT / record.trace_paths["trajectory"] |
1764 | | - if src.is_file(): |
1765 | | - rel = f"traces/{task_slug}/trajectory.json" |
1766 | | - dst = traces_dir / task_slug / "trajectory.json" |
1767 | | - dst.parent.mkdir(parents=True, exist_ok=True) |
1768 | | - shutil.copy2(src, dst) |
1769 | | - bundled_trace_paths["trajectory"] = rel |
1770 | | - if record.trace_paths.get("transcript"): |
1771 | | - src_tx = PROJECT_ROOT / record.trace_paths["transcript"] |
1772 | | - if src_tx.is_file(): |
1773 | | - tx_name = src_tx.name |
1774 | | - rel_tx = f"traces/{task_slug}/{tx_name}" |
1775 | | - dst_tx = traces_dir / task_slug / tx_name |
1776 | | - dst_tx.parent.mkdir(parents=True, exist_ok=True) |
1777 | | - shutil.copy2(src_tx, dst_tx) |
1778 | | - bundled_trace_paths["transcript"] = rel_tx |
1779 | | - record.bundled_trace_paths = bundled_trace_paths |
1780 | | - |
1781 | | - audit_page_rel = f"audits/{task_slug}.json" |
1782 | | - record.audit_page = audit_page_rel |
1783 | | - _write_text(audits_dir / f"{task_slug}.json", json.dumps(audit_payload, indent=2, sort_keys=True)) |
1784 | | - task_page_rel = f"tasks/{task_slug}.html" |
1785 | | - task_page_path = tasks_dir / f"{task_slug}.html" |
1786 | | - _write_text(task_page_path, _build_task_page(record)) |
1787 | | - |
1788 | | - task_dict = _to_task_dict( |
1789 | | - record, |
1790 | | - task_page_rel, |
1791 | | - output_dir, |
1792 | | - repo_blob_base, |
1793 | | - ) |
1794 | | - tasks_out.append(task_dict) |
| 1847 | + _append_task_record(suite, run_dir.name, config, task_dir) |
| 1848 | + |
| 1849 | + # Fallback: analysis layout does not have official run-dir/config structure. |
| 1850 | + if not tasks_out: |
| 1851 | + for suite, run_dir_name, config, task_dir in _iter_analysis_layout_tasks(runs_dir, prefix_map, run_filter): |
| 1852 | + _append_task_record(suite, run_dir_name, config, task_dir) |
1795 | 1853 |
|
1796 | 1854 | run_summaries: list[dict[str, Any]] = [] |
1797 | 1855 | suite_summaries: list[dict[str, Any]] = [] |
|
0 commit comments