Skip to content

Commit a5abde0

Browse files
DvirDukhanCopilot
andcommitted
fix(mcp): address PR #702 find_symbol review comments
- Validate/coerce limit: reject non-integers, accept stringified ints, and clamp to 1..FIND_SYMBOL_DB_LIMIT so a negative value no longer flips rows[:limit] into a surprising tail slice (_clamp_find_symbol_limit). - Reject empty symbol names up front (empty, whitespace, or a trailing-dot qualname) instead of silently querying for "". - Always include file_match in the response (False when no file filter is requested) so the documented shape is stable, and sort deterministically (in-file matches first, then file/line/name/id) so ordering no longer depends on FalkorDB row order. - Add tests/mcp/test_find_symbol.py covering simple + dotted-name resolution, file disambiguation + ordering, response-shape stability, deterministic order, and limit/empty-name validation. 70/70 mcp tests pass against live FalkorDB; ruff clean. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent f3eac12 commit a5abde0

2 files changed

Lines changed: 275 additions & 18 deletions

File tree

api/mcp/tools/structural.py

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,27 @@ async def _neighbors_payload(
739739
FIND_SYMBOL_DB_LIMIT = 200
740740

741741

742+
def _clamp_find_symbol_limit(limit: Any) -> int:
743+
"""Coerce ``limit`` to ``1..FIND_SYMBOL_DB_LIMIT``.
744+
745+
Agents may hand back a stringified or out-of-range ``limit``; a negative
746+
value would otherwise flip ``rows[:limit]`` into a surprising tail slice and
747+
a non-integer would fail deep in the slice with an opaque error. Strings of
748+
digits are accepted; anything else is rejected up front.
749+
"""
750+
if isinstance(limit, bool):
751+
raise ValueError(f"limit must be an integer, got bool: {limit!r}")
752+
if isinstance(limit, str) and limit.lstrip("-").isdigit():
753+
limit = int(limit)
754+
if not isinstance(limit, int):
755+
raise ValueError(f"limit must be an integer, got: {limit!r}")
756+
if limit < 1:
757+
return 1
758+
if limit > FIND_SYMBOL_DB_LIMIT:
759+
return FIND_SYMBOL_DB_LIMIT
760+
return limit
761+
762+
742763
@app.tool(
743764
name="find_symbol",
744765
description=(
@@ -767,14 +788,20 @@ async def find_symbol(
767788
"""Look up Function/Class nodes by their simple name.
768789
769790
Symbol nodes store only the SIMPLE name (``foo``), never the qualname
770-
(``Bar.foo``), so a dotted input is reduced to its last segment. When
771-
``file`` is given we do not silently widen the search: every candidate is
772-
tagged ``file_match`` and the in-file ones sort first, so a wrong/empty
773-
file filter degrades visibly (the agent still sees the global matches but
774-
knows none were in the requested file) rather than routing a relationship
775-
query to an arbitrary same-named symbol.
791+
(``Bar.foo``), so a dotted input is reduced to its last segment. Every
792+
candidate carries a ``file_match`` flag (always ``False`` when no ``file``
793+
filter is requested); when ``file`` is given we do not silently widen the
794+
search — the in-file ones sort first, so a wrong/empty file filter degrades
795+
visibly (the agent still sees the global matches but knows none were in the
796+
requested file) rather than routing a relationship query to an arbitrary
797+
same-named symbol.
776798
"""
777-
simple = str(name).strip().split(".")[-1]
799+
simple = str(name).strip().split(".")[-1].strip()
800+
if not simple:
801+
raise ValueError(
802+
f"name must be a non-empty symbol name, got: {name!r}"
803+
)
804+
eff_limit = _clamp_find_symbol_limit(limit)
778805
g = _project_arg(project, branch)
779806
try:
780807
res = await g._query(
@@ -796,19 +823,28 @@ async def find_symbol(
796823
for r in rows:
797824
r["symbol_id"] = r.pop("id")
798825

799-
if file is not None:
800-
needle = str(file).strip().lstrip("/")
826+
needle = str(file).strip().lstrip("/") if file is not None else ""
801827

802-
def _matches(r: dict[str, Any]) -> bool:
803-
fp = r.get("file") or ""
804-
return bool(needle) and (fp == needle or fp.endswith(needle) or needle in fp)
828+
def _matches(r: dict[str, Any]) -> bool:
829+
fp = r.get("file") or ""
830+
return bool(needle) and (fp == needle or fp.endswith(needle) or needle in fp)
805831

806-
for r in rows:
807-
r["file_match"] = _matches(r)
808-
# In-file matches first; preserve relative order within each group.
809-
rows.sort(key=lambda r: not r["file_match"])
810-
811-
return rows[:limit]
832+
# ``file_match`` is part of the documented response shape, so set it on
833+
# every row (False when no ``file`` filter is requested) rather than only
834+
# when ``file`` is given. Sort deterministically — in-file matches first,
835+
# then by file/line/name/id — so ordering never depends on FalkorDB row
836+
# order between runs.
837+
for r in rows:
838+
r["file_match"] = _matches(r)
839+
rows.sort(key=lambda r: (
840+
not r["file_match"],
841+
r.get("file") or "",
842+
r["line"] if r.get("line") is not None else math.inf,
843+
r.get("name") or "",
844+
r["symbol_id"],
845+
))
846+
847+
return rows[:eff_limit]
812848

813849

814850
@app.tool(

tests/mcp/test_find_symbol.py

Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
"""T9 — find_symbol MCP tool tests.
2+
3+
Covers name resolution (simple + dotted qualname reduction), the ``file``
4+
disambiguation flag + ordering, response-shape stability, deterministic order,
5+
and ``limit`` / empty-name validation.
6+
"""
7+
8+
from __future__ import annotations
9+
10+
import uuid
11+
12+
import pytest
13+
14+
15+
pytestmark = pytest.mark.anyio
16+
17+
18+
@pytest.fixture
19+
def anyio_backend() -> str:
20+
return "asyncio"
21+
22+
23+
# ---------------------------------------------------------------------------
24+
# Validation — no FalkorDB required (these raise before touching the graph)
25+
# ---------------------------------------------------------------------------
26+
27+
28+
@pytest.mark.parametrize("bad", ["", " ", "Foo.", "Bar. "])
29+
async def test_find_symbol_rejects_empty_name(bad):
30+
from api.mcp.tools.structural import find_symbol
31+
32+
with pytest.raises(ValueError, match="non-empty symbol name"):
33+
await find_symbol(name=bad, project="any")
34+
35+
36+
@pytest.mark.parametrize("bad", ["not-a-number", 1.5, None, True])
37+
async def test_find_symbol_rejects_garbage_limit(bad):
38+
from api.mcp.tools.structural import find_symbol
39+
40+
with pytest.raises(ValueError, match="limit"):
41+
await find_symbol(name="entrypoint", project="any", limit=bad)
42+
43+
44+
# ---------------------------------------------------------------------------
45+
# Integration — sample_project fixture
46+
# ---------------------------------------------------------------------------
47+
48+
49+
async def test_find_symbol_resolves_simple_name(indexed_fixture):
50+
from api.mcp.tools.structural import find_symbol
51+
52+
rows = await find_symbol(
53+
name="entrypoint",
54+
project=indexed_fixture.project,
55+
branch=indexed_fixture.branch,
56+
)
57+
assert rows, "entrypoint must resolve"
58+
r = rows[0]
59+
assert isinstance(r["symbol_id"], int)
60+
assert r["name"] == "entrypoint"
61+
assert r["label"] == "Function"
62+
assert r["file"].endswith("entrypoint.py")
63+
assert isinstance(r["line"], int)
64+
# file_match is part of the documented shape and present even without a
65+
# ``file`` filter (False when none requested).
66+
assert r["file_match"] is False
67+
68+
69+
async def test_find_symbol_reduces_dotted_qualname(indexed_fixture):
70+
"""``Class.method`` must resolve identically to the bare ``method``."""
71+
from api.mcp.tools.structural import find_symbol
72+
73+
dotted = await find_symbol(
74+
name="BaseRepo.repo",
75+
project=indexed_fixture.project,
76+
branch=indexed_fixture.branch,
77+
)
78+
simple = await find_symbol(
79+
name="repo",
80+
project=indexed_fixture.project,
81+
branch=indexed_fixture.branch,
82+
)
83+
assert {r["symbol_id"] for r in dotted} == {r["symbol_id"] for r in simple}
84+
assert all(r["name"] == "repo" for r in dotted)
85+
# Fixture defines repo() on BaseRepo, UserRepo and OrderRepo.
86+
assert len(simple) == 3
87+
88+
89+
async def test_find_symbol_file_match_shape_stable(indexed_fixture):
90+
"""Every row carries a boolean ``file_match`` regardless of the args."""
91+
from api.mcp.tools.structural import find_symbol
92+
93+
rows = await find_symbol(
94+
name="repo",
95+
project=indexed_fixture.project,
96+
branch=indexed_fixture.branch,
97+
)
98+
assert rows
99+
for r in rows:
100+
assert isinstance(r["file_match"], bool)
101+
assert set(r) >= {"symbol_id", "name", "label", "file", "line", "file_match"}
102+
103+
104+
async def test_find_symbol_file_filter_flags_matches(indexed_fixture):
105+
"""A matching ``file`` flags every in-file candidate; a non-matching one
106+
still returns the global matches but flags none."""
107+
from api.mcp.tools.structural import find_symbol
108+
109+
in_file = await find_symbol(
110+
name="repo",
111+
project=indexed_fixture.project,
112+
branch=indexed_fixture.branch,
113+
file="repo.py",
114+
)
115+
assert in_file and all(r["file_match"] for r in in_file)
116+
117+
no_file = await find_symbol(
118+
name="repo",
119+
project=indexed_fixture.project,
120+
branch=indexed_fixture.branch,
121+
file="does_not_exist.py",
122+
)
123+
# Search is not silently widened away — global matches still surface…
124+
assert {r["symbol_id"] for r in no_file} == {r["symbol_id"] for r in in_file}
125+
# …but the agent can see none were in the requested file.
126+
assert all(r["file_match"] is False for r in no_file)
127+
128+
129+
async def test_find_symbol_deterministic_order(indexed_fixture):
130+
"""Repeated calls return rows in the same order (no FalkorDB row-order
131+
dependence)."""
132+
from api.mcp.tools.structural import find_symbol
133+
134+
a = await find_symbol(
135+
name="repo",
136+
project=indexed_fixture.project,
137+
branch=indexed_fixture.branch,
138+
)
139+
b = await find_symbol(
140+
name="repo",
141+
project=indexed_fixture.project,
142+
branch=indexed_fixture.branch,
143+
)
144+
assert [r["symbol_id"] for r in a] == [r["symbol_id"] for r in b]
145+
146+
147+
async def test_find_symbol_honors_limit(indexed_fixture):
148+
from api.mcp.tools.structural import find_symbol
149+
150+
rows = await find_symbol(
151+
name="repo",
152+
project=indexed_fixture.project,
153+
branch=indexed_fixture.branch,
154+
limit="1", # stringified ints are accepted and coerced
155+
)
156+
assert len(rows) == 1
157+
158+
159+
async def test_find_symbol_negative_limit_clamped_not_tail_slice(indexed_fixture):
160+
"""A negative ``limit`` must clamp to 1, NOT flip ``rows[:limit]`` into a
161+
surprising tail slice."""
162+
from api.mcp.tools.structural import find_symbol
163+
164+
full = await find_symbol(
165+
name="repo",
166+
project=indexed_fixture.project,
167+
branch=indexed_fixture.branch,
168+
)
169+
clamped = await find_symbol(
170+
name="repo",
171+
project=indexed_fixture.project,
172+
branch=indexed_fixture.branch,
173+
limit=-2,
174+
)
175+
assert len(clamped) == 1
176+
assert clamped[0]["symbol_id"] == full[0]["symbol_id"]
177+
178+
179+
async def test_find_symbol_registered():
180+
from api.mcp.server import app
181+
182+
names = {t.name for t in await app.list_tools()}
183+
assert "find_symbol" in names
184+
185+
186+
# ---------------------------------------------------------------------------
187+
# Ordering across files — purpose-built graph (two same-named symbols)
188+
# ---------------------------------------------------------------------------
189+
190+
191+
@pytest.fixture
192+
async def two_file_graph():
193+
"""Two ``foo`` Functions in different files so ``file`` disambiguation can
194+
actually reorder the result. Unique branch keeps it isolated; not torn down
195+
(matches the ``indexed_fixture`` / ``cycle_graph`` pattern)."""
196+
from api.graph import Graph
197+
198+
project = "find_symbol_order_test"
199+
branch = f"order-{uuid.uuid4().hex[:8]}"
200+
g = Graph(project, branch=branch)
201+
g.g.query(
202+
"""
203+
CREATE
204+
(:Function:Searchable {name: 'foo', path: '/tmp/aaa.py', src_start: 1}),
205+
(:Function:Searchable {name: 'foo', path: '/tmp/bbb.py', src_start: 1})
206+
"""
207+
)
208+
yield project, branch
209+
210+
211+
async def test_find_symbol_orders_in_file_first(two_file_graph):
212+
from api.mcp.tools.structural import find_symbol
213+
214+
project, branch = two_file_graph
215+
216+
rows = await find_symbol(name="foo", project=project, branch=branch, file="bbb.py")
217+
assert len(rows) == 2
218+
# The requested file sorts first and is the only flagged match.
219+
assert rows[0]["file"].endswith("bbb.py")
220+
assert rows[0]["file_match"] is True
221+
assert rows[1]["file_match"] is False

0 commit comments

Comments
 (0)