1616logger = logging .getLogger (__name__ )
1717
1818
19+ def _page_uri (uri : str ) -> str :
20+ """Return the page part of an objects.inv URI."""
21+ return uri .split ("#" , 1 )[0 ]
22+
23+
24+ def _document_candidates (uri : str ) -> tuple [str , ...]:
25+ """Return possible document slugs for a symbol URI.
26+
27+ ``objects.inv`` entries use HTML paths such as ``library/json.html``,
28+ while Sphinx JSON content is ingested with extensionless slugs such as
29+ ``library/json``. Prefer the exact URI first, then the extensionless form.
30+ """
31+ page_uri = _page_uri (uri )
32+ if page_uri .endswith (".html" ):
33+ return (page_uri , page_uri [:- 5 ])
34+ return (page_uri ,)
35+
36+
37+ def _resolve_symbol_location (
38+ conn : sqlite3 .Connection ,
39+ row : sqlite3 .Row ,
40+ ) -> tuple [str , str ]:
41+ """Resolve a symbol row to a get_docs-compatible slug and anchor."""
42+ uri = str (row ["uri" ])
43+ fallback_slug = _page_uri (uri )
44+ fallback_anchor = str (row ["anchor" ] or "" )
45+
46+ section_id = row ["section_id" ]
47+ if section_id is not None :
48+ section_row = conn .execute (
49+ """
50+ SELECT d.slug, s.anchor
51+ FROM sections s
52+ JOIN documents d ON s.document_id = d.id
53+ WHERE s.id = ?
54+ LIMIT 1
55+ """ ,
56+ (section_id ,),
57+ ).fetchone ()
58+ if section_row is not None :
59+ return section_row ["slug" ], section_row ["anchor" ] or ""
60+
61+ doc_row = None
62+ document_id = row ["document_id" ]
63+ if document_id is not None :
64+ doc_row = conn .execute (
65+ "SELECT id, slug FROM documents WHERE id = ? LIMIT 1" ,
66+ (document_id ,),
67+ ).fetchone ()
68+
69+ if doc_row is None :
70+ for candidate in _document_candidates (uri ):
71+ doc_row = conn .execute (
72+ """
73+ SELECT id, slug
74+ FROM documents
75+ WHERE doc_set_id = ? AND (slug = ? OR uri = ?)
76+ LIMIT 1
77+ """ ,
78+ (row ["doc_set_id" ], candidate , candidate ),
79+ ).fetchone ()
80+ if doc_row is not None :
81+ break
82+
83+ if doc_row is None :
84+ return fallback_slug , fallback_anchor
85+
86+ if fallback_anchor :
87+ section_row = conn .execute (
88+ """
89+ SELECT anchor
90+ FROM sections
91+ WHERE document_id = ? AND anchor = ?
92+ LIMIT 1
93+ """ ,
94+ (doc_row ["id" ], fallback_anchor ),
95+ ).fetchone ()
96+ if section_row is not None :
97+ return doc_row ["slug" ], section_row ["anchor" ] or ""
98+
99+ return doc_row ["slug" ], ""
100+
101+
19102def _normalize_scores (hits : list [SymbolHit ]) -> list [SymbolHit ]:
20103 """Normalize BM25 scores to [0.1, 1.0] range.
21104
@@ -129,7 +212,8 @@ def search_symbols(
129212 try :
130213 cursor = conn .execute (
131214 """
132- SELECT sym.id, sym.qualified_name, sym.symbol_type, sym.uri,
215+ SELECT sym.id, sym.doc_set_id, sym.document_id, sym.section_id,
216+ sym.qualified_name, sym.symbol_type, sym.uri,
133217 sym.anchor, sym.module, d.version,
134218 bm25(symbols_fts, 10.0, 1.0) as score,
135219 snippet(symbols_fts, 0, '**', '**', '...', 32) as snippet_text
@@ -148,19 +232,19 @@ def search_symbols(
148232 logger .warning ("FTS5 query failed for symbols: %r" , match_expr )
149233 return []
150234
151- hits = [
152- SymbolHit (
235+ hits = []
236+ for row in rows :
237+ slug , anchor = _resolve_symbol_location (conn , row )
238+ hits .append (SymbolHit (
153239 uri = row ["uri" ],
154240 title = row ["qualified_name" ],
155241 kind = row ["symbol_type" ] or "symbol" ,
156242 snippet = row ["snippet_text" ] or "" ,
157243 score = row ["score" ],
158244 version = row ["version" ],
159- slug = row ["uri" ].split ("#" )[0 ] if "#" in row ["uri" ] else row ["uri" ],
160- anchor = row ["anchor" ] or "" ,
161- )
162- for row in rows
163- ]
245+ slug = slug ,
246+ anchor = anchor ,
247+ ))
164248
165249 return _normalize_scores (hits )
166250
@@ -251,7 +335,8 @@ def lookup_symbols_exact(
251335
252336 cursor = conn .execute (
253337 """
254- SELECT s.qualified_name, s.symbol_type, s.uri, s.anchor,
338+ SELECT s.doc_set_id, s.document_id, s.section_id,
339+ s.qualified_name, s.symbol_type, s.uri, s.anchor,
255340 s.module, d.version
256341 FROM symbols s
257342 JOIN doc_sets d ON s.doc_set_id = d.id
@@ -264,16 +349,17 @@ def lookup_symbols_exact(
264349 )
265350 rows = cursor .fetchall ()
266351
267- return [
268- SymbolHit (
352+ hits = []
353+ for row in rows :
354+ slug , anchor = _resolve_symbol_location (conn , row )
355+ hits .append (SymbolHit (
269356 uri = row ["uri" ],
270357 title = row ["qualified_name" ],
271358 kind = row ["symbol_type" ] or "symbol" ,
272359 snippet = "" ,
273360 score = 1.0 if row ["qualified_name" ] == query else 0.8 ,
274361 version = row ["version" ],
275- slug = row ["uri" ].split ("#" )[0 ] if "#" in row ["uri" ] else row ["uri" ],
276- anchor = row ["anchor" ] or "" ,
277- )
278- for row in rows
279- ]
362+ slug = slug ,
363+ anchor = anchor ,
364+ ))
365+ return hits
0 commit comments