11"""Export all evaluation ground truth (GT) datasets to a single Excel file.
22
33Each query file under eval/data/queries/ becomes a sheet in the workbook,
4- with one row per query and columns for query text, GT doc IDs, type, etc.
4+ with one row per query including:
5+
6+ - query text, type, level, category
7+ - relevant_docs (GT IDs)
8+ - relevant_answer (resolved to human-readable title + content snippet)
9+
10+ The GT ID → answer resolution uses the corresponding graph sqlite file,
11+ so you can see the actual answer text alongside the cryptic doc ID.
512
613Usage::
714
1219from __future__ import annotations
1320
1421import json
22+ import sqlite3
1523from pathlib import Path
1624
1725from openpyxl import Workbook
1826from openpyxl .styles import Alignment , Font , PatternFill
19- from openpyxl .utils import get_column_letter
2027
2128REPO_ROOT = Path (__file__ ).resolve ().parents [2 ]
2229QUERIES_DIR = REPO_ROOT / "eval" / "data" / "queries"
2330OUTPUT_PATH = REPO_ROOT / "eval" / "data" / "gt_datasets.xlsx"
2431
32+ # Map query-file name → graph sqlite file for resolving GT IDs to text.
33+ GRAPH_MAP = {
34+ "krra" : "krra_graph.sqlite" ,
35+ "krra_hard" : "krra_graph.sqlite" ,
36+ "krra_graph" : "krra_graph.sqlite" ,
37+ "krra_multihop" : "krra_graph.sqlite" ,
38+ "assort" : "assort_graph.sqlite" ,
39+ "assort_hard" : "assort_graph.sqlite" ,
40+ "x2bee" : "x2bee_graph.sqlite" ,
41+ "x2bee_hard" : "x2bee_graph.sqlite" ,
42+ }
43+
44+
45+ def _load_resolver (graph_path : Path ) -> dict [str , str ]:
46+ """Build a map from GT-style ID to human-readable answer.
47+
48+ Keys cover both:
49+ - node title (for structured data like "products:12800000")
50+ - properties.doc_id (for document data with hash IDs)
51+ """
52+ if not graph_path .exists ():
53+ return {}
54+
55+ resolver : dict [str , str ] = {}
56+ try :
57+ conn = sqlite3 .connect (str (graph_path ))
58+ conn .row_factory = sqlite3 .Row
59+ rows = conn .execute (
60+ "SELECT title, content, properties_json FROM syn_nodes"
61+ ).fetchall ()
62+ for r in rows :
63+ title = r ["title" ] or ""
64+ content = (r ["content" ] or "" )[:120 ].replace ("\n " , " " ).strip ()
65+ summary = f"{ title } ▸ { content } " if content else title
66+
67+ # Key by title (structured: "products:12800000")
68+ if title :
69+ resolver [title ] = summary
70+
71+ # Key by properties.doc_id (documents: "0346542e...")
72+ try :
73+ props = json .loads (r ["properties_json" ] or "{}" )
74+ except json .JSONDecodeError :
75+ props = {}
76+ did = props .get ("doc_id" , "" )
77+ if did :
78+ resolver [str (did )] = summary
79+
80+ conn .close ()
81+ except Exception as exc :
82+ print (f" ⚠ resolver failed for { graph_path .name } : { exc } " )
83+
84+ return resolver
85+
2586
2687HEADER_FILL = PatternFill ("solid" , fgColor = "2F5496" )
2788HEADER_FONT = Font (bold = True , color = "FFFFFF" , size = 11 )
2889
2990
30- def _flatten_query (q : dict ) -> dict :
31- """Normalize a query dict to a flat set of columns."""
91+ def _flatten_query (q : dict , resolver : dict [str , str ] | None = None ) -> dict :
92+ """Normalize a query dict to a flat set of columns.
93+
94+ When ``resolver`` is provided, the `relevant_answer` column is
95+ populated with human-readable title+content for each GT ID.
96+ """
3297 relevant = q .get ("relevant_docs" ) or q .get ("answer_ids" ) or []
3398 if isinstance (relevant , dict ):
3499 relevant = list (relevant .keys ())
100+
101+ # Resolve GT IDs to readable answers (title + content snippet)
102+ resolved_lines : list [str ] = []
103+ if resolver is not None :
104+ for rid in relevant [:20 ]: # cap at 20 for readability
105+ key = str (rid )
106+ answer = resolver .get (key , "" )
107+ if not answer :
108+ # Try stripping chunk suffix ("#1", "#2") for doc lookup
109+ base = key .rsplit (" #" , 1 )[0 ]
110+ answer = resolver .get (base , "" )
111+ if answer :
112+ resolved_lines .append (f"[{ key } ] { answer } " )
113+ else :
114+ resolved_lines .append (f"[{ key } ] (not found in graph)" )
115+ if len (relevant ) > 20 :
116+ resolved_lines .append (f"... +{ len (relevant ) - 20 } more" )
117+
35118 return {
36119 "qid" : q .get ("qid" , q .get ("query_id" , "" )),
37120 "query" : q .get ("query" , q .get ("question" , "" )),
@@ -40,11 +123,18 @@ def _flatten_query(q: dict) -> dict:
40123 "category" : q .get ("category" , "" ),
41124 "description" : q .get ("description" , "" ),
42125 "relevant_count" : len (relevant ),
126+ "relevant_answer" : "\n " .join (resolved_lines ),
43127 "relevant_docs" : "\n " .join (str (x ) for x in relevant ),
44128 }
45129
46130
47- def _write_sheet (wb : Workbook , name : str , meta : dict , queries : list [dict ]) -> None :
131+ def _write_sheet (
132+ wb : Workbook ,
133+ name : str ,
134+ meta : dict ,
135+ queries : list [dict ],
136+ resolver : dict [str , str ] | None = None ,
137+ ) -> None :
48138 ws = wb .create_sheet (title = name [:31 ]) # Excel sheet name limit
49139
50140 # Metadata header (first rows)
@@ -56,14 +146,16 @@ def _write_sheet(wb: Workbook, name: str, meta: dict, queries: list[dict]) -> No
56146 ws ["B3" ] = meta .get ("id_field" , "doc_id" )
57147 ws ["A4" ] = "Total queries"
58148 ws ["B4" ] = len (queries )
149+ ws ["A5" ] = "Answer resolved?"
150+ ws ["B5" ] = "YES — see relevant_answer column" if resolver else "NO graph found"
59151
60- for row in range (1 , 5 ):
152+ for row in range (1 , 6 ):
61153 ws [f"A{ row } " ].font = Font (bold = True )
62154
63155 # Column headers
64156 columns = ["qid" , "query" , "type" , "level" , "category" , "description" ,
65- "relevant_count" , "relevant_docs" ]
66- header_row = 6
157+ "relevant_count" , "relevant_answer" , " relevant_docs" ]
158+ header_row = 7
67159 for i , col in enumerate (columns , start = 1 ):
68160 cell = ws .cell (row = header_row , column = i , value = col )
69161 cell .fill = HEADER_FILL
@@ -72,16 +164,23 @@ def _write_sheet(wb: Workbook, name: str, meta: dict, queries: list[dict]) -> No
72164
73165 # Data rows
74166 for r , q in enumerate (queries , start = header_row + 1 ):
75- flat = _flatten_query (q )
167+ flat = _flatten_query (q , resolver = resolver )
76168 for c , col in enumerate (columns , start = 1 ):
77169 cell = ws .cell (row = r , column = c , value = flat .get (col , "" ))
78170 cell .alignment = Alignment (vertical = "top" , wrap_text = True )
79171
80- # Column widths
81- widths = {"A" : 8 , "B" : 45 , "C" : 18 , "D" : 8 , "E" : 22 , "F" : 42 , "G" : 14 , "H" : 60 }
172+ # Column widths — wider for answer column
173+ widths = {
174+ "A" : 8 , "B" : 45 , "C" : 18 , "D" : 8 , "E" : 22 , "F" : 42 ,
175+ "G" : 14 , "H" : 70 , "I" : 45 ,
176+ }
82177 for col , w in widths .items ():
83178 ws .column_dimensions [col ].width = w
84179
180+ # Row heights (to accommodate wrap)
181+ for r in range (header_row + 1 , header_row + 1 + len (queries )):
182+ ws .row_dimensions [r ].height = 80
183+
85184 # Freeze header
86185 ws .freeze_panes = f"A{ header_row + 1 } "
87186
@@ -132,6 +231,9 @@ def main() -> None:
132231 if default is not None :
133232 wb .remove (default )
134233
234+ # Pre-load resolvers for each unique graph (avoid re-loading per sheet)
235+ resolver_cache : dict [str , dict [str , str ]] = {}
236+
135237 files = sorted (QUERIES_DIR .glob ("*.json" ))
136238 stats : list [dict ] = []
137239
@@ -154,15 +256,26 @@ def main() -> None:
154256 "id_field" : data .get ("id_field" , "doc_id" ),
155257 }
156258
157- _write_sheet (wb , name , meta , queries )
259+ # Load resolver for this dataset's graph
260+ resolver : dict [str , str ] | None = None
261+ graph_file = GRAPH_MAP .get (name )
262+ if graph_file :
263+ if graph_file not in resolver_cache :
264+ graph_path = REPO_ROOT / "eval" / "data" / graph_file
265+ resolver_cache [graph_file ] = _load_resolver (graph_path )
266+ print (f" 📖 loaded { graph_file } : { len (resolver_cache [graph_file ])} entries" )
267+ resolver = resolver_cache [graph_file ]
268+
269+ _write_sheet (wb , name , meta , queries , resolver = resolver )
158270 stats .append ({
159271 "dataset" : name ,
160272 "description" : meta ["description" ][:100 ],
161273 "queries" : len (queries ),
162274 "id_field" : meta ["id_field" ],
163275 "language" : _guess_language (name , queries ),
164276 })
165- print (f" ✓ { name } : { len (queries )} queries" )
277+ resolved = " (with answers)" if resolver else ""
278+ print (f" ✓ { name } : { len (queries )} queries{ resolved } " )
166279
167280 _write_summary (wb , stats )
168281
0 commit comments