-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathrunner.py
More file actions
83 lines (73 loc) · 2.66 KB
/
Copy pathrunner.py
File metadata and controls
83 lines (73 loc) · 2.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""Bench-agnostic runner. Bench #0 version — iterates a symbol-lookup dataset
and calls `adapter.query_symbol` per query."""
from __future__ import annotations
import json
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Any
from benchmarks.suite.contract import Adapter
from benchmarks.suite.scoring import rank_of_first_hit
@dataclass
class BenchRow:
query_id: str
target_symbol: str
expected_file: str
paths_count: int
top_paths: list[str]
rank: int | None
latency_ms: float
tokens: int
error: str | None = None
def run_bench_0(
adapter: Adapter,
dataset: list[dict[str, Any]],
limit: int,
corpus=None,
) -> list[BenchRow]:
"""Run Bench #0 against one adapter. Returns one BenchRow per query.
Guarantees:
- adapter.setup(corpus) called once before any query
- adapter.teardown() called once after all queries (even on mid-run error)
- per-query errors captured on the row; runner does not raise
"""
rows: list[BenchRow] = []
adapter.setup(corpus)
try:
for q in dataset:
qid = q["id"]
sym = q["target_symbol"]
expected = q["expected_file"]
try:
res = adapter.query_symbol(sym, limit=limit)
rank = rank_of_first_hit(res.paths, expected)
rows.append(BenchRow(
query_id=qid, target_symbol=sym, expected_file=expected,
paths_count=len(res.paths), top_paths=res.paths[:3],
rank=rank, latency_ms=res.latency_ms, tokens=res.tokens_used,
error=None,
))
except Exception as e:
rows.append(BenchRow(
query_id=qid, target_symbol=sym, expected_file=expected,
paths_count=0, top_paths=[], rank=None,
latency_ms=0.0, tokens=0,
error=f"{type(e).__name__}: {e}",
))
finally:
adapter.teardown()
return rows
def stamp_rows(rows: list[BenchRow], adapter_name: str) -> list[dict]:
"""Serialize rows to dicts and stamp the adapter name on each. Single
source of truth for the (`BenchRow` → jsonl dict + `adapter`) conversion."""
stamped = []
for r in rows:
d = asdict(r)
d["adapter"] = adapter_name
stamped.append(d)
return stamped
def rows_to_jsonl(rows: list[BenchRow], dst: Path) -> None:
"""Write one JSON object per line. Reporting reads this back."""
dst.parent.mkdir(parents=True, exist_ok=True)
with dst.open("w") as f:
for r in rows:
f.write(json.dumps(asdict(r)) + "\n")