CodeAlive-AI
diff --git a/‎CLAUDE.md‎
Lines changed: 21 additions & 13 deletions b/‎CLAUDE.md‎
Lines changed: 21 additions & 13 deletions
diff --git a/‎src/tests/test_artifact_relationships.py‎
Lines changed: 13 additions & 25 deletions b/‎src/tests/test_artifact_relationships.py‎
Lines changed: 13 additions & 25 deletions
diff --git a/‎src/tests/test_datasources.py‎
Lines changed: 5 additions & 16 deletions b/‎src/tests/test_datasources.py‎
Lines changed: 5 additions & 16 deletions
diff --git a/‎src/tests/test_e2e_tools.py‎
Lines changed: 107 additions & 1 deletion b/‎src/tests/test_e2e_tools.py‎
Lines changed: 107 additions & 1 deletion
@@ -118,7 +118,7 @@ This is a Model Context Protocol (MCP) server that provides AI clients with acce
 2. Client calls tools (`get_data_sources` → `semantic_search` / `grep_search` → `fetch_artifacts` / `get_artifact_relationships` → `chat` only if synthesis is still needed)
 3. Middleware chain runs: N8N cleanup → ObservabilityMiddleware (OTel span + log correlation)
 4. Tool translates MCP call to CodeAlive API request (with `X-CodeAlive-*` headers)
-5. Response parsed, formatted as XML or text, returned to AI client
+5. Response parsed and returned to the AI client — as a `dict` for metadata/discovery tools, as an XML string for `fetch_artifacts`, or as plain text for `chat`
 
 ### Environment Variables
 
@@ -205,18 +205,26 @@ When adding a new tool, ensure:
 
 ## Tool Response Conventions
 
-### Response format: dict for metadata, XML for content
-
-Tools that return **search metadata** (identifiers, match counts, line numbers)
-return a `dict`. FastMCP serializes it automatically via `pydantic_core.to_json`,
-which preserves Unicode — no manual `json.dumps()` needed. Examples:
-`semantic_search`, `grep_search`, `codebase_search`.
-
-Tools that return **source code content** return an **XML string**. XML tags give
-the LLM clear structural boundaries between artifacts, content blocks, and
-relationships — this is critical for accurate reasoning over multi-artifact
-responses. **Do not convert `fetch_artifacts` or `get_artifact_relationships`
-to dict/JSON** — the XML structure is intentional.
+### Response format: dict for metadata/discovery, XML only for source code
+
+Tools that return **structured metadata** (identifiers, match counts, line
+numbers, relationship groups, data source listings) return a `dict` (or list of
+dicts). FastMCP serializes it automatically via `pydantic_core.to_json`, which
+preserves Unicode — no manual `json.dumps()` needed. Examples:
+`semantic_search`, `grep_search`, `codebase_search`, `get_data_sources`,
+`get_artifact_relationships`.
+
+**Never call `json.dumps(...)` from a tool's return path.** Python's `json.dumps`
+defaults to `ensure_ascii=True` and escapes Cyrillic/CJK/etc. to `\uXXXX`.
+Returning a `dict` lets FastMCP route through `pydantic_core.to_json`, which
+emits UTF-8. If you must serialize manually for some reason, pass
+`ensure_ascii=False` explicitly.
+
+Only `fetch_artifacts` returns an **XML string**. XML tags give the LLM clear
+structural boundaries between artifacts, content blocks, and inline
+relationships when streaming source code — this is critical for accurate
+reasoning over multi-artifact responses. **Do not convert `fetch_artifacts` to
+dict/JSON** — the XML structure is intentional.
 
 ### Hint other MCP tools when the response implies a follow-up call
 
 
@@ -1,7 +1,5 @@
 """Tests for the get_artifact_relationships tool."""
 
-import json
-
 import pytest
 from unittest.mock import AsyncMock, MagicMock, patch
 
@@ -10,7 +8,7 @@
 
 from tools.artifact_relationships import (
     PROFILE_MAP,
-    _build_relationships_json,
+    _build_relationships_dict,
     get_artifact_relationships,
 )
 
@@ -32,8 +30,8 @@ def test_references_only_maps_correctly(self):
         assert PROFILE_MAP["referencesOnly"] == "ReferencesOnly"
 
 
-class TestBuildRelationshipsJson:
-    """Test compact JSON rendering of relationship responses."""
+class TestBuildRelationshipsDict:
+    """Test dict shape of relationship responses (FastMCP handles serialization)."""
 
     def test_found_with_grouped_relationships(self):
         data = {
@@ -71,11 +69,7 @@ def test_found_with_grouped_relationships(self):
             ],
         }
 
-        result = _build_relationships_json(data)
-        # Compact JSON
-        assert ", " not in result and ": " not in result
-
-        parsed = json.loads(result)
+        parsed = _build_relationships_dict(data)
         assert parsed["sourceIdentifier"] == "org/repo::path::Symbol"
         assert parsed["profile"] == "callsOnly"
         assert parsed["found"] is True
@@ -103,7 +97,7 @@ def test_not_found_omits_relationships(self):
             "relationships": [],
         }
 
-        parsed = json.loads(_build_relationships_json(data))
+        parsed = _build_relationships_dict(data)
         assert parsed["found"] is False
         assert "relationships" not in parsed
 
@@ -130,7 +124,7 @@ def test_empty_groups_still_rendered(self):
             ],
         }
 
-        parsed = json.loads(_build_relationships_json(data))
+        parsed = _build_relationships_dict(data)
         types = [g["type"] for g in parsed["relationships"]]
         assert types == ["ancestors", "descendants"]
         for g in parsed["relationships"]:
@@ -158,14 +152,15 @@ def test_optional_fields_omitted_when_null(self):
             ],
         }
 
-        parsed = json.loads(_build_relationships_json(data))
+        parsed = _build_relationships_dict(data)
         item = parsed["relationships"][0]["items"][0]
         assert item["identifier"] == "org/repo::path::Target"
         assert "filePath" not in item
         assert "startLine" not in item
         assert "shortSummary" not in item
 
-    def test_quotes_and_specials_use_json_escaping(self):
+    def test_quotes_and_specials_pass_through_unchanged(self):
+        """Special chars (<, >, &, ") are preserved as-is in the dict — no HTML encoding."""
         data = {
             "sourceIdentifier": "org/repo::path::Class<T>",
             "profile": "CallsOnly",
@@ -186,13 +181,7 @@ def test_quotes_and_specials_use_json_escaping(self):
             ],
         }
 
-        result = _build_relationships_json(data)
-        # No HTML entity encoding in JSON output
-        assert "&quot;" not in result
-        assert "&amp;" not in result
-        assert "&lt;" not in result
-
-        parsed = json.loads(result)
+        parsed = _build_relationships_dict(data)
         assert parsed["sourceIdentifier"] == "org/repo::path::Class<T>"
         assert parsed["relationships"][0]["items"][0]["identifier"] == "org/repo::path::Method<T>"
         assert parsed["relationships"][0]["items"][0]["shortSummary"] == 'Returns "value" & more'
@@ -206,7 +195,7 @@ def test_profile_mapped_back_to_mcp_name(self):
                 "found": False,
                 "relationships": [],
             }
-            parsed = json.loads(_build_relationships_json(data))
+            parsed = _build_relationships_dict(data)
             assert parsed["profile"] == mcp_name
 
 
@@ -247,7 +236,7 @@ async def test_default_profile_sends_calls_only(self, mock_get_api_key):
         # Verify the API was called with CallsOnly profile
         call_args = mock_client.post.call_args
         assert call_args[1]["json"]["profile"] == "CallsOnly"
-        assert json.loads(result)["found"] is True
+        assert result["found"] is True
 
     @pytest.mark.asyncio
     @patch("tools.artifact_relationships.get_api_key_from_context")
@@ -353,8 +342,7 @@ async def test_not_found_response_renders_correctly(self, mock_get_api_key):
         mock_context.base_url = "https://app.codealive.ai"
         ctx.request_context.lifespan_context = mock_context
 
-        result = await get_artifact_relationships(ctx=ctx, identifier="org/repo::path::Missing")
+        data = await get_artifact_relationships(ctx=ctx, identifier="org/repo::path::Missing")
 
-        data = json.loads(result)
         assert data["found"] is False
         assert "relationships" not in data
@@ -1,6 +1,5 @@
 """Tests for data sources tool."""
 
-import json
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
@@ -50,11 +49,8 @@ async def test_get_data_sources_removes_repository_ids_from_workspaces(mock_get_
 
     mock_ctx.request_context.lifespan_context = mock_lifespan_context
 
-    # Call the function
-    result = await get_data_sources(mock_ctx, alive_only=True)
-
-    # Result is a compact JSON array
-    data_sources = json.loads(result)
+    # Tool returns the parsed list directly; FastMCP serializes it.
+    data_sources = await get_data_sources(mock_ctx, alive_only=True)
 
     # Verify repository still has all fields
     repo = next(ds for ds in data_sources if ds["type"] == "Repository")
@@ -116,11 +112,7 @@ async def test_get_data_sources_preserves_other_workspace_fields(mock_get_api_ke
 
     mock_ctx.request_context.lifespan_context = mock_lifespan_context
 
-    # Call the function
-    result = await get_data_sources(mock_ctx, alive_only=True)
-
-    # Result is compact JSON array
-    data_sources = json.loads(result)
+    data_sources = await get_data_sources(mock_ctx, alive_only=True)
 
     workspace = data_sources[0]
 
@@ -167,11 +159,8 @@ async def test_get_data_sources_handles_missing_repository_ids(mock_get_api_key)
 
     mock_ctx.request_context.lifespan_context = mock_lifespan_context
 
-    # Call the function - should not raise an error
-    result = await get_data_sources(mock_ctx, alive_only=True)
-
-    # Result is compact JSON array
-    data_sources = json.loads(result)
+    # Should not raise an error
+    data_sources = await get_data_sources(mock_ctx, alive_only=True)
 
     # Verify workspace is intact
     workspace = data_sources[0]
 
@@ -133,6 +133,25 @@ async def test_empty_list_returns_message(self):
         assert data["dataSources"] == []
         assert "No data sources found" in data["message"]
 
+    @pytest.mark.asyncio
+    async def test_unicode_preserved_in_response(self):
+        """Cyrillic in name/description must survive as UTF-8, not \\uXXXX."""
+        payload = [
+            {"id": "r1", "name": "кирилл-репо", "type": "Repository",
+             "description": "Описание про принтеры HPRT"},
+        ]
+
+        mcp = _server({"/api/datasources/ready": lambda r: httpx.Response(200, json=payload)})
+        async with Client(mcp) as client:
+            result = await client.call_tool("get_data_sources", {})
+
+        text = _text(result)
+        # Round-trip via ensure_ascii=False — ASCII-escaped output would mismatch.
+        assert text == json.dumps(json.loads(text), separators=(",", ":"), ensure_ascii=False)
+        assert "кирилл-репо" in text
+        assert "Описание про принтеры HPRT" in text
+        assert "\\u04" not in text
+
     @pytest.mark.asyncio
     async def test_alive_only_false_hits_all_endpoint(self):
         hit = []
@@ -497,6 +516,32 @@ async def test_404_includes_recovery_hint(self):
         assert result.is_error
         assert "get_data_sources" in text
 
+    @pytest.mark.asyncio
+    async def test_unicode_preserved_in_response(self):
+        """Cyrillic in path/description must survive as UTF-8, not \\uXXXX."""
+        payload = {
+            "results": [
+                {
+                    "kind": "File",
+                    "identifier": "org/repo::база/file.md::",
+                    "description": "Описание про принтеры HPRT",
+                    "location": {"path": "база/file.md"},
+                    "contentByteSize": 100,
+                }
+            ]
+        }
+
+        mcp = _server({"/api/search/semantic": lambda r: httpx.Response(200, json=payload)})
+        async with Client(mcp) as client:
+            result = await client.call_tool(
+                "semantic_search", {"query": "кир", "data_sources": ["repo"]},
+            )
+
+        text = _text(result)
+        assert text == json.dumps(json.loads(text), separators=(",", ":"), ensure_ascii=False)
+        assert "база/file.md" in text
+        assert "\\u04" not in text
+
 
 # ---------------------------------------------------------------------------
 # grep_search
@@ -699,6 +744,32 @@ async def test_404_includes_recovery_hint(self):
         assert result.is_error
         assert "get_data_sources" in _text(result)
 
+    @pytest.mark.asyncio
+    async def test_unicode_preserved_in_response(self):
+        """Cyrillic in path/lineText must survive as UTF-8, not \\uXXXX."""
+        payload = {
+            "results": [
+                {
+                    "kind": "File",
+                    "identifier": "org/repo::база/file.md::",
+                    "location": {"path": "база/file.md"},
+                    "matchCount": 1,
+                    "matches": [{"lineNumber": 3, "startColumn": 1, "endColumn": 5,
+                                 "lineText": "тест кириллица"}],
+                }
+            ]
+        }
+        mcp = _server({"/api/search/grep": lambda r: httpx.Response(200, json=payload)})
+        async with Client(mcp) as client:
+            result = await client.call_tool(
+                "grep_search", {"query": "кир", "data_sources": ["repo"]},
+            )
+
+        text = _text(result)
+        assert text == json.dumps(json.loads(text), separators=(",", ":"), ensure_ascii=False)
+        assert "тест кириллица" in text
+        assert "\\u04" not in text
+
 
 # ---------------------------------------------------------------------------
 # fetch_artifacts
@@ -1037,8 +1108,9 @@ def handler(req):
             )
 
         text = _text(result)
-        assert ", " not in text and ": " not in text
         data = json.loads(text)
+        # FastMCP serializes via pydantic_core.to_json — compact, UTF-8.
+        assert text == json.dumps(data, separators=(",", ":"), ensure_ascii=False)
         assert data["found"] is True
         types = [g["type"] for g in data["relationships"]]
         assert "outgoing_calls" in types
@@ -1221,6 +1293,40 @@ def handler(req):
         assert data["relationships"][0]["type"] == "references"
         assert data["relationships"][0]["totalCount"] == 5
 
+    @pytest.mark.asyncio
+    async def test_unicode_preserved_in_response(self):
+        """Cyrillic in identifiers/summaries must survive as UTF-8, not \\uXXXX."""
+        response_data = {
+            "sourceIdentifier": "org/repo::файл.cs::Класс.Метод",
+            "profile": "CallsOnly",
+            "found": True,
+            "relationships": [
+                {
+                    "relationType": "OutgoingCalls",
+                    "totalCount": 1,
+                    "returnedCount": 1,
+                    "truncated": False,
+                    "items": [{"identifier": "org/repo::другой.cs::Метод2",
+                               "filePath": "другой.cs",
+                               "shortSummary": "Описание метода"}],
+                }
+            ],
+        }
+        mcp = _server({
+            "/api/search/artifact-relationships": lambda r: httpx.Response(200, json=response_data),
+        })
+        async with Client(mcp) as client:
+            result = await client.call_tool(
+                "get_artifact_relationships",
+                {"identifier": "org/repo::файл.cs::Класс.Метод"},
+            )
+
+        text = _text(result)
+        assert text == json.dumps(json.loads(text), separators=(",", ":"), ensure_ascii=False)
+        assert "Класс.Метод" in text
+        assert "Описание метода" in text
+        assert "\\u04" not in text
+
     @pytest.mark.asyncio
     async def test_inheritance_profile_maps_correctly(self):
         response_data = {