Skip to content

Commit c2571ac

Browse files
committed
fix: return retrievable slugs for symbol hits
1 parent d339d27 commit c2571ac

2 files changed

Lines changed: 137 additions & 16 deletions

File tree

src/mcp_server_python_docs/retrieval/ranker.py

Lines changed: 102 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,89 @@
1616
logger = logging.getLogger(__name__)
1717

1818

19+
def _page_uri(uri: str) -> str:
20+
"""Return the page part of an objects.inv URI."""
21+
return uri.split("#", 1)[0]
22+
23+
24+
def _document_candidates(uri: str) -> tuple[str, ...]:
25+
"""Return possible document slugs for a symbol URI.
26+
27+
``objects.inv`` entries use HTML paths such as ``library/json.html``,
28+
while Sphinx JSON content is ingested with extensionless slugs such as
29+
``library/json``. Prefer the exact URI first, then the extensionless form.
30+
"""
31+
page_uri = _page_uri(uri)
32+
if page_uri.endswith(".html"):
33+
return (page_uri, page_uri[:-5])
34+
return (page_uri,)
35+
36+
37+
def _resolve_symbol_location(
38+
conn: sqlite3.Connection,
39+
row: sqlite3.Row,
40+
) -> tuple[str, str]:
41+
"""Resolve a symbol row to a get_docs-compatible slug and anchor."""
42+
uri = str(row["uri"])
43+
fallback_slug = _page_uri(uri)
44+
fallback_anchor = str(row["anchor"] or "")
45+
46+
section_id = row["section_id"]
47+
if section_id is not None:
48+
section_row = conn.execute(
49+
"""
50+
SELECT d.slug, s.anchor
51+
FROM sections s
52+
JOIN documents d ON s.document_id = d.id
53+
WHERE s.id = ?
54+
LIMIT 1
55+
""",
56+
(section_id,),
57+
).fetchone()
58+
if section_row is not None:
59+
return section_row["slug"], section_row["anchor"] or ""
60+
61+
doc_row = None
62+
document_id = row["document_id"]
63+
if document_id is not None:
64+
doc_row = conn.execute(
65+
"SELECT id, slug FROM documents WHERE id = ? LIMIT 1",
66+
(document_id,),
67+
).fetchone()
68+
69+
if doc_row is None:
70+
for candidate in _document_candidates(uri):
71+
doc_row = conn.execute(
72+
"""
73+
SELECT id, slug
74+
FROM documents
75+
WHERE doc_set_id = ? AND (slug = ? OR uri = ?)
76+
LIMIT 1
77+
""",
78+
(row["doc_set_id"], candidate, candidate),
79+
).fetchone()
80+
if doc_row is not None:
81+
break
82+
83+
if doc_row is None:
84+
return fallback_slug, fallback_anchor
85+
86+
if fallback_anchor:
87+
section_row = conn.execute(
88+
"""
89+
SELECT anchor
90+
FROM sections
91+
WHERE document_id = ? AND anchor = ?
92+
LIMIT 1
93+
""",
94+
(doc_row["id"], fallback_anchor),
95+
).fetchone()
96+
if section_row is not None:
97+
return doc_row["slug"], section_row["anchor"] or ""
98+
99+
return doc_row["slug"], ""
100+
101+
19102
def _normalize_scores(hits: list[SymbolHit]) -> list[SymbolHit]:
20103
"""Normalize BM25 scores to [0.1, 1.0] range.
21104
@@ -129,7 +212,8 @@ def search_symbols(
129212
try:
130213
cursor = conn.execute(
131214
"""
132-
SELECT sym.id, sym.qualified_name, sym.symbol_type, sym.uri,
215+
SELECT sym.id, sym.doc_set_id, sym.document_id, sym.section_id,
216+
sym.qualified_name, sym.symbol_type, sym.uri,
133217
sym.anchor, sym.module, d.version,
134218
bm25(symbols_fts, 10.0, 1.0) as score,
135219
snippet(symbols_fts, 0, '**', '**', '...', 32) as snippet_text
@@ -148,19 +232,19 @@ def search_symbols(
148232
logger.warning("FTS5 query failed for symbols: %r", match_expr)
149233
return []
150234

151-
hits = [
152-
SymbolHit(
235+
hits = []
236+
for row in rows:
237+
slug, anchor = _resolve_symbol_location(conn, row)
238+
hits.append(SymbolHit(
153239
uri=row["uri"],
154240
title=row["qualified_name"],
155241
kind=row["symbol_type"] or "symbol",
156242
snippet=row["snippet_text"] or "",
157243
score=row["score"],
158244
version=row["version"],
159-
slug=row["uri"].split("#")[0] if "#" in row["uri"] else row["uri"],
160-
anchor=row["anchor"] or "",
161-
)
162-
for row in rows
163-
]
245+
slug=slug,
246+
anchor=anchor,
247+
))
164248

165249
return _normalize_scores(hits)
166250

@@ -251,7 +335,8 @@ def lookup_symbols_exact(
251335

252336
cursor = conn.execute(
253337
"""
254-
SELECT s.qualified_name, s.symbol_type, s.uri, s.anchor,
338+
SELECT s.doc_set_id, s.document_id, s.section_id,
339+
s.qualified_name, s.symbol_type, s.uri, s.anchor,
255340
s.module, d.version
256341
FROM symbols s
257342
JOIN doc_sets d ON s.doc_set_id = d.id
@@ -264,16 +349,17 @@ def lookup_symbols_exact(
264349
)
265350
rows = cursor.fetchall()
266351

267-
return [
268-
SymbolHit(
352+
hits = []
353+
for row in rows:
354+
slug, anchor = _resolve_symbol_location(conn, row)
355+
hits.append(SymbolHit(
269356
uri=row["uri"],
270357
title=row["qualified_name"],
271358
kind=row["symbol_type"] or "symbol",
272359
snippet="",
273360
score=1.0 if row["qualified_name"] == query else 0.8,
274361
version=row["version"],
275-
slug=row["uri"].split("#")[0] if "#" in row["uri"] else row["uri"],
276-
anchor=row["anchor"] or "",
277-
)
278-
for row in rows
279-
]
362+
slug=slug,
363+
anchor=anchor,
364+
))
365+
return hits

tests/test_services.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,41 @@ def test_search_symbol_fast_path(self, populated_with_content):
102102
assert len(result.hits) >= 1
103103
assert result.hits[0].title == "asyncio.TaskGroup"
104104

105+
def test_symbol_hit_slug_is_retrievable_when_content_slug_is_extensionless(
106+
self,
107+
populated_db,
108+
):
109+
db = populated_db
110+
doc_set_id = db.execute("SELECT id FROM doc_sets LIMIT 1").fetchone()[0]
111+
db.execute(
112+
"INSERT INTO documents (doc_set_id, uri, slug, title, content_text, char_count) "
113+
"VALUES (?, 'library/json', 'library/json', 'json', "
114+
"'json page content', 17)",
115+
(doc_set_id,),
116+
)
117+
doc_id = db.execute("SELECT last_insert_rowid()").fetchone()[0]
118+
db.execute(
119+
"INSERT INTO sections (document_id, uri, anchor, heading, level, ordinal, "
120+
"content_text, char_count) VALUES (?, 'library/json', '', 'Introduction', "
121+
"1, 0, 'json.dumps guidance for command line tools.', 43)",
122+
(doc_id,),
123+
)
124+
db.execute(
125+
"INSERT INTO symbols (doc_set_id, qualified_name, normalized_name, "
126+
"symbol_type, uri, anchor, module) VALUES (?, 'json.dumps', "
127+
"'json.dumps', 'function', 'library/json.html#json.dumps', "
128+
"'json.dumps', 'json')",
129+
(doc_set_id,),
130+
)
131+
db.commit()
132+
133+
hit = SearchService(db, {}).search("json.dumps", version="3.13", kind="symbol").hits[0]
134+
135+
assert hit.slug == "library/json"
136+
assert hit.anchor == ""
137+
docs = ContentService(db).get_docs(hit.slug, hit.version, hit.anchor or None)
138+
assert "json.dumps guidance" in docs.content
139+
105140
def test_search_no_results(self, populated_with_content):
106141
svc = SearchService(populated_with_content, {})
107142
result = svc.search("nonexistent_xyz_symbol", kind="symbol")

0 commit comments

Comments
 (0)