Skip to content

Commit 9973ff5

Browse files
committed
Add DataSource parameter for artifacts endpoints
1 parent 49091e7 commit 9973ff5

7 files changed

Lines changed: 277 additions & 8 deletions

src/tests/test_artifact_relationships.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,80 @@ async def test_explicit_profile_maps_correctly(self, mock_get_api_key):
372372

373373
call_args = mock_client.post.call_args
374374
assert call_args[1]["json"]["profile"] == "InheritanceOnly"
375+
# No data_source supplied => omitted from the body.
376+
assert "dataSource" not in call_args[1]["json"]
377+
378+
@pytest.mark.asyncio
379+
@patch("tools.artifact_relationships.get_api_key_from_context")
380+
async def test_forwards_data_source(self, mock_get_api_key):
381+
mock_get_api_key.return_value = "test_key"
382+
383+
ctx = MagicMock(spec=Context)
384+
ctx.debug = AsyncMock()
385+
ctx.error = AsyncMock()
386+
387+
mock_response = MagicMock()
388+
mock_response.json.return_value = {
389+
"sourceIdentifier": "id",
390+
"profile": "CallsOnly",
391+
"found": True,
392+
"relationships": [],
393+
}
394+
mock_response.raise_for_status = MagicMock()
395+
396+
mock_client = AsyncMock()
397+
mock_client.post.return_value = mock_response
398+
399+
mock_context = MagicMock()
400+
mock_context.client = mock_client
401+
mock_context.base_url = "https://app.codealive.ai"
402+
ctx.request_context.lifespan_context = mock_context
403+
404+
await get_artifact_relationships(
405+
ctx=ctx,
406+
identifier="id",
407+
data_source="repo (main)",
408+
)
409+
410+
assert mock_client.post.call_args[1]["json"]["dataSource"] == "repo (main)"
411+
412+
@pytest.mark.asyncio
413+
@patch("tools.artifact_relationships.get_api_key_from_context")
414+
async def test_ambiguous_409_surfaces_candidate_data_sources(self, mock_get_api_key):
415+
import httpx
416+
417+
mock_get_api_key.return_value = "test_key"
418+
419+
ctx = MagicMock(spec=Context)
420+
ctx.debug = AsyncMock()
421+
ctx.error = AsyncMock()
422+
423+
mock_response = MagicMock()
424+
mock_response.status_code = 409
425+
mock_response.text = (
426+
'{"detail": "Identifier matches 2 data sources: '
427+
"Name='repo (main)' Id='ds-main', Name='repo (master)' Id='ds-master'\"}"
428+
)
429+
mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
430+
"Conflict", request=MagicMock(), response=mock_response
431+
)
432+
433+
mock_client = AsyncMock()
434+
mock_client.post.return_value = mock_response
435+
436+
mock_context = MagicMock()
437+
mock_context.client = mock_client
438+
mock_context.base_url = "https://app.codealive.ai"
439+
ctx.request_context.lifespan_context = mock_context
440+
441+
with pytest.raises(ToolError) as exc:
442+
await get_artifact_relationships(ctx=ctx, identifier="org/repo::path::Symbol")
443+
444+
message = str(exc.value)
445+
assert "409" in message
446+
# The candidate data sources from the backend 409 must be surfaced, plus the data_source retry hint.
447+
assert "repo (main)" in message and "repo (master)" in message
448+
assert "data_source" in message
375449

376450
@pytest.mark.asyncio
377451
async def test_empty_identifier_raises_tool_error(self):
@@ -446,3 +520,21 @@ async def test_not_found_response_renders_correctly(self, mock_get_api_key):
446520

447521
assert data["found"] is False
448522
assert "relationships" not in data
523+
524+
def test_not_found_hint_with_data_source_suggests_retry_or_omit(self):
525+
payload = _build_relationships_dict(
526+
{"sourceIdentifier": "org/repo::path::S", "profile": "CallsOnly", "found": False},
527+
data_source="repo (main)",
528+
)
529+
hint = payload["hint"]
530+
assert "repo (main)" in hint
531+
assert "data_source" in hint
532+
assert "omit" in hint.lower()
533+
534+
def test_not_found_hint_without_data_source_is_generic(self):
535+
payload = _build_relationships_dict(
536+
{"sourceIdentifier": "org/repo::path::S", "profile": "CallsOnly", "found": False},
537+
)
538+
hint = payload["hint"]
539+
assert "data_source" not in hint
540+
assert "fresh identifier" in hint

src/tests/test_fetch_artifacts.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,39 @@ def test_hint_absent_when_no_artifacts_have_content(self):
365365
assert "<hint>" not in result
366366

367367

368+
class TestBuildArtifactsXmlDataSourceMissHint:
369+
"""When a data_source was supplied but nothing was found, hint to retry or drop it."""
370+
371+
def test_hint_when_data_source_scoped_returns_nothing(self):
372+
data = {"artifacts": [
373+
{"identifier": "repo::a.ts::F", "content": None, "contentByteSize": None},
374+
]}
375+
result = _build_artifacts_xml(data, data_source="repo (main)")
376+
assert "<hint>" in result
377+
assert "repo (main)" in result
378+
# Guides toward the two recovery moves.
379+
assert "data_source" in result
380+
assert "omit" in result.lower()
381+
382+
def test_hint_when_empty_artifacts_and_data_source(self):
383+
result = _build_artifacts_xml({"artifacts": []}, data_source="ds-main")
384+
assert "ds-main" in result and "<hint>" in result
385+
386+
def test_no_miss_hint_when_data_source_resolved_content(self):
387+
data = {"artifacts": [
388+
{"identifier": "repo::a.ts::F", "content": "code", "contentByteSize": 4},
389+
]}
390+
result = _build_artifacts_xml(data, data_source="repo (main)")
391+
assert "omit data_source" not in result
392+
393+
def test_no_miss_hint_without_data_source(self):
394+
data = {"artifacts": [
395+
{"identifier": "repo::a.ts::F", "content": None, "contentByteSize": None},
396+
]}
397+
result = _build_artifacts_xml(data)
398+
assert "<hint>" not in result
399+
400+
368401
@pytest.mark.asyncio
369402
@patch('tools.fetch_artifacts.get_api_key_from_context')
370403
async def test_fetch_artifacts_returns_xml(mock_get_api_key):
@@ -476,6 +509,43 @@ async def test_fetch_artifacts_posts_correct_body(mock_get_api_key):
476509
body = call_args.kwargs["json"]
477510
assert body["identifiers"] == ["id1", "id2"]
478511
assert "names" not in body
512+
# No data_source supplied => the field is omitted (preserves the 409-on-ambiguity fallback).
513+
assert "dataSource" not in body
514+
515+
516+
@pytest.mark.asyncio
517+
@patch('tools.fetch_artifacts.get_api_key_from_context')
518+
async def test_fetch_artifacts_forwards_data_source(mock_get_api_key):
519+
"""data_source (Name or Id) is forwarded as the DataSource body field when provided."""
520+
mock_get_api_key.return_value = "test_key"
521+
522+
ctx = MagicMock(spec=Context)
523+
ctx.info = AsyncMock()
524+
ctx.warning = AsyncMock()
525+
ctx.error = AsyncMock()
526+
527+
mock_response = MagicMock()
528+
mock_response.json.return_value = {"artifacts": []}
529+
mock_response.raise_for_status = MagicMock()
530+
531+
mock_client = AsyncMock()
532+
mock_client.post.return_value = mock_response
533+
534+
mock_codealive_context = MagicMock()
535+
mock_codealive_context.client = mock_client
536+
mock_codealive_context.base_url = "https://app.codealive.ai"
537+
538+
ctx.request_context.lifespan_context = mock_codealive_context
539+
ctx.request_context.headers = {"authorization": "Bearer test_key"}
540+
541+
await fetch_artifacts(
542+
ctx=ctx,
543+
identifiers=["id1"],
544+
data_source="repo (main)",
545+
)
546+
547+
body = mock_client.post.call_args.kwargs["json"]
548+
assert body["dataSource"] == "repo (main)"
479549

480550

481551
@pytest.mark.asyncio

src/tests/test_response_transformer.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,10 +294,34 @@ def test_data_preservation(self):
294294
assert first["identifier"] == "CodeAlive-AI/codealive-mcp::src/tools/search.py::codebase_search"
295295
assert first["contentByteSize"] == 8500
296296
assert first["description"] == "Main search function"
297+
# Data-source identity must be surfaced (not stripped) so the agent can feed it back
298+
# as `data_source` to disambiguate a branch-blind identifier.
299+
assert first["dataSource"] == {"id": "685b21230e3822f4efa9d073", "name": "codealive-mcp"}
297300

298301
assert second["path"] == "README.md"
299302
assert second["kind"] == "Chunk"
300303
assert second["description"] == "Search documentation section"
304+
assert second["dataSource"] == {"id": "685b21230e3822f4efa9d073", "name": "codealive-mcp"}
305+
306+
def test_grep_transform_surfaces_data_source(self):
307+
response = {
308+
"results": [
309+
{
310+
"kind": "File",
311+
"identifier": "owner/repo::src/auth.py",
312+
"location": {"path": "src/auth.py"},
313+
"matchCount": 1,
314+
"matches": [
315+
{"lineNumber": 3, "startColumn": 0, "endColumn": 4, "lineText": "auth"}
316+
],
317+
"dataSource": {"type": "repository", "id": "ds-main", "name": "repo (main)"},
318+
}
319+
]
320+
}
321+
322+
result = transform_grep_response(response)
323+
324+
assert result["results"][0]["dataSource"] == {"id": "ds-main", "name": "repo (main)"}
301325

302326
def test_grep_transform_preserves_match_previews(self):
303327
response = {

src/tools/artifact_relationships.py

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Artifact relationships tool implementation."""
22

3-
from typing import Any, Dict, List, Literal
3+
from typing import Any, Dict, List, Literal, Optional
44
from urllib.parse import urljoin
55

66
import httpx
@@ -37,6 +37,7 @@ async def get_artifact_relationships(
3737
identifier: str,
3838
profile: Literal["callsOnly", "inheritanceOnly", "allRelevant", "referencesOnly"] = "callsOnly",
3939
max_count_per_type: int = 50,
40+
data_source: Optional[str] = None,
4041
) -> Dict[str, Any]:
4142
"""
4243
Retrieve relationship groups for a single artifact by profile.
@@ -84,6 +85,11 @@ async def get_artifact_relationships(
8485
- "allRelevant": calls + inheritance only; references are excluded
8586
- "referencesOnly": where-used LSP references for non-call usage
8687
max_count_per_type: Maximum related artifacts per relationship type (1–1000, default 50).
88+
data_source: Optional data-source Name or Id used to disambiguate an identifier that
89+
exists in more than one data source. Copy the `dataSource.name` or
90+
`dataSource.id` from a search result. Omit it for normal lookups; if the
91+
source identifier is ambiguous and you omit it, the backend returns a 409
92+
listing the candidate data sources.
8793
8894
Returns:
8995
A dict with grouped relationships:
@@ -103,6 +109,7 @@ async def get_artifact_relationships(
103109
"identifier": identifier,
104110
"profile": profile,
105111
"max_count_per_type": max_count_per_type,
112+
"data_source": data_source,
106113
}
107114

108115
if not identifier:
@@ -143,6 +150,8 @@ async def get_artifact_relationships(
143150
"profile": api_profile,
144151
"maxCountPerType": max_count_per_type,
145152
}
153+
if data_source:
154+
body["dataSource"] = data_source
146155

147156
await ctx.debug(f"Fetching {profile} relationships for artifact")
148157

@@ -156,7 +165,7 @@ async def get_artifact_relationships(
156165
log_api_response(response, request_id)
157166
response.raise_for_status()
158167

159-
return _build_relationships_dict(response.json())
168+
return _build_relationships_dict(response.json(), data_source=data_source)
160169

161170
except (httpx.HTTPStatusError, Exception) as e:
162171
logger.bind(
@@ -173,15 +182,24 @@ async def get_artifact_relationships(
173182
"(2) call semantic_search or grep_search again to get a fresh identifier — the index may have changed, "
174183
"(3) check that the artifact is a function/class (relationships are not available for non-symbol artifacts)"
175184
),
185+
409: (
186+
"(1) the identifier exists in more than one data source — see the candidate data sources in the Detail above; each one will resolve, "
187+
"(2) retry get_artifact_relationships with data_source set to one candidate's Name or Id; if that data source isn't the one you want, retry with the next candidate, "
188+
"(3) do NOT invent relation results — pick from the listed data sources"
189+
),
176190
},
177191
)
178192

179193

180-
def _build_relationships_dict(data: dict) -> Dict[str, Any]:
194+
def _build_relationships_dict(data: dict, data_source: Optional[str] = None) -> Dict[str, Any]:
181195
"""Build a dict representation of an artifact relationships response.
182196
183197
FastMCP serializes the dict via pydantic_core.to_json, which preserves UTF-8 —
184198
don't reintroduce json.dumps here, it would re-escape non-ASCII identifiers.
199+
200+
``data_source`` is the selector the caller passed (if any); when the source is not
201+
found it shapes the recovery hint so the agent can retry with another data source
202+
or drop the selector.
185203
"""
186204
raw_source_id = data.get("sourceIdentifier") or ""
187205
raw_profile = data.get("profile") or ""
@@ -208,9 +226,9 @@ def _build_relationships_dict(data: dict) -> Dict[str, Any]:
208226
counts = _build_counts(data.get("availableRelationshipCounts"))
209227
if counts is not None:
210228
payload["availableRelationshipCounts"] = counts
211-
payload["hint"] = _build_relationship_hint(found, mcp_profile, groups, counts)
229+
payload["hint"] = _build_relationship_hint(found, mcp_profile, groups, counts, data_source)
212230
else:
213-
payload["hint"] = _build_relationship_hint(found, mcp_profile, [], None)
231+
payload["hint"] = _build_relationship_hint(found, mcp_profile, [], None, data_source)
214232

215233
return payload
216234

@@ -266,9 +284,19 @@ def _build_relationship_hint(
266284
profile: str,
267285
groups: List[Dict[str, Any]],
268286
counts: Dict[str, int] | None,
287+
data_source: Optional[str] = None,
269288
) -> str:
270289
"""Give model-facing next-step guidance for graph traversal results."""
271290
if not found:
291+
if data_source:
292+
return (
293+
f'No relationship data was found for this identifier in data source "{data_source}". '
294+
"The identifier may belong to a different data source, or the data_source value may be "
295+
"wrong. Try: re-run with data_source set to a different candidate (use the `dataSource` "
296+
"name or id from your search results, or call get_data_sources), or omit data_source "
297+
"entirely — if the identifier is ambiguous you then get a 409 listing the candidate data "
298+
"sources. Otherwise re-run semantic_search or grep_search to get a fresh identifier."
299+
)
272300
return (
273301
"No relationship data was found for this identifier. Verify that the identifier came from "
274302
"a recent search/fetch result and points to a symbol-level artifact; otherwise re-run "

0 commit comments

Comments
 (0)