Improve MCP relationship guidance and tool logging

rodion-m · rodion-m · commit 92cc94c24e35 · 2026-05-04T23:45:08.000+05:00
diff --git a/src/codealive_mcp_server.py b/src/codealive_mcp_server.py
@@ -80,6 +80,9 @@
     - Use specific function/class names or file path scopes when looking for particular implementations
     - Treat `semantic_search` and `grep_search` as the default discovery tools
     - Prefer `semantic_search` over the deprecated `codebase_search` legacy alias
+    - Use `get_artifact_relationships` only with exact artifact identifiers from prior search/fetch results.
+      It expands a known artifact's relationship graph; it does not search by path, class name, or guessed symbol.
+      For exact source code, call `fetch_artifacts` on identifiers returned by search or relationships.
     - Remember that context from previous messages is maintained in the same conversation
 
     Flexible data source usage:
diff --git a/src/middleware/observability_middleware.py b/src/middleware/observability_middleware.py
@@ -11,7 +11,7 @@
 execution carries the correlation ID.
 """
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 from loguru import logger
 from opentelemetry import trace
@@ -25,11 +25,43 @@
 _tracer = trace.get_tracer("codealive-mcp.tools")
 
 
+def _extract_tool_arguments(context: "MiddlewareContext") -> dict[str, Any]:
+    """Best-effort extraction of raw MCP tool arguments from FastMCP middleware context."""
+    message = getattr(context, "message", None)
+    args = getattr(message, "arguments", None)
+    if isinstance(args, dict):
+        return dict(args)
+
+    params = getattr(message, "params", None)
+    if isinstance(params, dict):
+        args = params.get("arguments")
+        if isinstance(args, dict):
+            return dict(args)
+    else:
+        args = getattr(params, "arguments", None)
+        if isinstance(args, dict):
+            return dict(args)
+
+    if isinstance(message, dict):
+        args = message.get("arguments")
+        if isinstance(args, dict):
+            return dict(args)
+
+        params = message.get("params")
+        if isinstance(params, dict):
+            args = params.get("arguments")
+            if isinstance(args, dict):
+                return dict(args)
+
+    return {}
+
+
 class ObservabilityMiddleware(Middleware):
     """Wrap each ``tools/call`` in an OTel span and log its outcome."""
 
     async def on_call_tool(self, context: "MiddlewareContext", call_next: "CallNext"):
         tool_name = getattr(context.message, "name", "unknown")
+        tool_arguments = _extract_tool_arguments(context)
 
         with _tracer.start_as_current_span(
             f"tool {tool_name}",
@@ -44,21 +76,25 @@ async def on_call_tool(self, context: "MiddlewareContext", call_next: "CallNext"
             span_ctx = span.get_span_context()
             trace_id = format(span_ctx.trace_id, "032x") if span_ctx.trace_id else ""
 
-            with logger.contextualize(trace_id=trace_id, tool=tool_name):
-                logger.info("Tool call started: {tool}", tool=tool_name)
+            with logger.contextualize(
+                trace_id=trace_id,
+                tool=tool_name,
+                tool_arguments=tool_arguments,
+            ):
+                logger.debug("Tool call started: {tool}", tool=tool_name)
 
                 try:
                     result = await call_next(context)
                 except Exception as exc:
                     span.set_status(StatusCode.ERROR, str(exc))
                     span.record_exception(exc)
-                    logger.error(
-                        "Tool call failed: {tool} — {error}",
-                        tool=tool_name,
+                    logger.bind(
+                        error_type=type(exc).__name__,
                         error=str(exc),
-                    )
+                        tool_arguments=tool_arguments,
+                    ).opt(exception=exc).warning("Tool call failed: {tool}", tool=tool_name)
                     raise
 
                 span.set_status(StatusCode.OK)
-                logger.info("Tool call completed: {tool}", tool=tool_name)
+                logger.debug("Tool call completed: {tool}", tool=tool_name)
                 return result
diff --git a/src/tests/test_artifact_relationships.py b/src/tests/test_artifact_relationships.py
@@ -208,7 +208,7 @@ async def test_default_profile_sends_calls_only(self, mock_get_api_key):
         mock_get_api_key.return_value = "test_key"
 
         ctx = MagicMock(spec=Context)
-        ctx.info = AsyncMock()
+        ctx.debug = AsyncMock()
         ctx.error = AsyncMock()
 
         mock_response = MagicMock()
@@ -244,7 +244,7 @@ async def test_explicit_profile_maps_correctly(self, mock_get_api_key):
         mock_get_api_key.return_value = "test_key"
 
         ctx = MagicMock(spec=Context)
-        ctx.info = AsyncMock()
+        ctx.debug = AsyncMock()
         ctx.error = AsyncMock()
 
         mock_response = MagicMock()
@@ -295,7 +295,7 @@ async def test_api_error_returns_error_json(self, mock_get_api_key):
         mock_get_api_key.return_value = "test_key"
 
         ctx = MagicMock(spec=Context)
-        ctx.info = AsyncMock()
+        ctx.debug = AsyncMock()
         ctx.error = AsyncMock()
 
         mock_response = MagicMock()
@@ -322,7 +322,7 @@ async def test_not_found_response_renders_correctly(self, mock_get_api_key):
         mock_get_api_key.return_value = "test_key"
 
         ctx = MagicMock(spec=Context)
-        ctx.info = AsyncMock()
+        ctx.debug = AsyncMock()
         ctx.error = AsyncMock()
 
         mock_response = MagicMock()
diff --git a/src/tests/test_e2e_tools.py b/src/tests/test_e2e_tools.py
@@ -15,10 +15,12 @@
 import httpx
 import pytest
 from fastmcp import Client, FastMCP
+from loguru import logger
 
 sys.path.insert(0, str(Path(__file__).parent.parent))
 
 from core import CodeAliveContext
+from middleware.observability_middleware import ObservabilityMiddleware
 from tools import (
     chat,
     codebase_consultant,
@@ -1204,6 +1206,38 @@ async def test_invalid_profile_returns_error(self):
         assert "callsOnly" in text
         assert "literal_error" in text or "Input should be" in text
 
+    @pytest.mark.asyncio
+    async def test_invalid_profile_is_logged_with_arguments_by_middleware(self):
+        """FastMCP validation fails before the tool body, so middleware must capture args."""
+        mcp = _server({})
+        mcp.add_middleware(ObservabilityMiddleware())
+        records = []
+        handler_id = logger.add(lambda message: records.append(message.record), level="DEBUG")
+
+        try:
+            async with Client(mcp) as client:
+                result = await client.call_tool(
+                    "get_artifact_relationships",
+                    {"identifier": "org/repo::x", "profile": "bogus"},
+                    raise_on_error=False,
+                )
+        finally:
+            logger.remove(handler_id)
+
+        assert result.is_error
+        failures = [
+            record for record in records
+            if record["message"] == "Tool call failed: get_artifact_relationships"
+        ]
+        assert len(failures) == 1
+        failure = failures[0]
+        assert failure["level"].name == "WARNING"
+        assert failure["extra"]["tool_arguments"] == {
+            "identifier": "org/repo::x",
+            "profile": "bogus",
+        }
+        assert failure["extra"]["error_type"] == "ValidationError"
+
     @pytest.mark.asyncio
     async def test_empty_identifier_returns_error(self):
         mcp = _server({})
diff --git a/src/tests/test_observability_middleware.py b/src/tests/test_observability_middleware.py
@@ -5,13 +5,14 @@
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
+from loguru import logger
 from opentelemetry import trace
 from opentelemetry.sdk.trace import TracerProvider, ReadableSpan
 from opentelemetry.sdk.trace.export import SimpleSpanProcessor, SpanExporter, SpanExportResult
 
 sys.path.insert(0, str(__import__("pathlib").Path(__file__).parent.parent))
 
-from middleware.observability_middleware import ObservabilityMiddleware
+from middleware.observability_middleware import ObservabilityMiddleware, _extract_tool_arguments
 
 
 class _CollectingExporter(SpanExporter):
@@ -49,9 +50,10 @@ def otel_setup():
     provider.shutdown()
 
 
-def _make_context(tool_name: str = "codebase_search"):
+def _make_context(tool_name: str = "codebase_search", arguments: dict | None = None):
     ctx = MagicMock()
     ctx.message.name = tool_name
+    ctx.message.arguments = arguments or {}
     return ctx
 
 
@@ -113,6 +115,28 @@ async def test_handles_missing_tool_name(self, otel_setup):
         assert span.name == "tool unknown"
         assert span.attributes["mcp.tool.name"] == "unknown"
 
+    @pytest.mark.asyncio
+    async def test_lifecycle_logs_are_debug_with_tool_arguments(self, otel_setup):
+        middleware = ObservabilityMiddleware()
+        tool_arguments = {"identifier": "org/repo::src/svc.py::run", "profile": "callsOnly"}
+        context = _make_context("get_artifact_relationships", tool_arguments)
+        call_next = AsyncMock(return_value="ok")
+        records = []
+        handler_id = logger.add(lambda message: records.append(message.record), level="DEBUG")
+
+        try:
+            await middleware.on_call_tool(context, call_next)
+        finally:
+            logger.remove(handler_id)
+
+        lifecycle = [
+            record for record in records
+            if record["message"].startswith("Tool call ")
+        ]
+        assert [record["level"].name for record in lifecycle] == ["DEBUG", "DEBUG"]
+        assert lifecycle[0]["extra"]["tool_arguments"] == tool_arguments
+        assert lifecycle[1]["extra"]["tool_arguments"] == tool_arguments
+
 
 # ---------------------------------------------------------------------------
 # Failed tool call
@@ -158,3 +182,47 @@ async def test_span_records_exception_event(self, otel_setup):
         messages = [e.attributes["exception.message"] for e in exception_events]
         assert "RuntimeError" in types
         assert "boom" in messages
+
+    @pytest.mark.asyncio
+    async def test_failure_logs_warning_with_full_tool_arguments(self, otel_setup):
+        middleware = ObservabilityMiddleware()
+        tool_arguments = {
+            "identifier": "org/repo::src/svc.py::run",
+            "profile": "bogus",
+            "max_count_per_type": 50,
+        }
+        context = _make_context("get_artifact_relationships", tool_arguments)
+        call_next = AsyncMock(side_effect=ValueError("bad profile"))
+        records = []
+        handler_id = logger.add(lambda message: records.append(message.record), level="DEBUG")
+
+        try:
+            with pytest.raises(ValueError, match="bad profile"):
+                await middleware.on_call_tool(context, call_next)
+        finally:
+            logger.remove(handler_id)
+
+        failures = [record for record in records if record["message"] == "Tool call failed: get_artifact_relationships"]
+        assert len(failures) == 1
+        failure = failures[0]
+        assert failure["level"].name == "WARNING"
+        assert failure["extra"]["tool"] == "get_artifact_relationships"
+        assert failure["extra"]["tool_arguments"] == tool_arguments
+        assert failure["extra"]["error_type"] == "ValueError"
+        assert failure["extra"]["error"] == "bad profile"
+
+
+class TestExtractToolArguments:
+    def test_extracts_fastmcp_arguments(self):
+        context = _make_context("tool", {"name": "value"})
+        assert _extract_tool_arguments(context) == {"name": "value"}
+
+    def test_extracts_json_rpc_params_arguments(self):
+        context = MagicMock()
+        context.message = {"params": {"arguments": {"identifier": "id"}}}
+        assert _extract_tool_arguments(context) == {"identifier": "id"}
+
+    def test_returns_empty_dict_when_unavailable(self):
+        context = MagicMock()
+        context.message = object()
+        assert _extract_tool_arguments(context) == {}
diff --git a/src/tests/test_tool_metadata.py b/src/tests/test_tool_metadata.py
@@ -35,3 +35,9 @@ async def test_all_tools_are_marked_read_only_with_titles():
         assert tool.title == title
         assert tool.annotations is not None
         assert tool.annotations.readOnlyHint is True
+
+    relationships_description = actual["get_artifact_relationships"].description
+    assert relationships_description is not None
+    assert "exact artifact identifier" in relationships_description
+    assert "not a search tool" in relationships_description
+    assert "fetch_artifacts" in relationships_description
diff --git a/src/tools/artifact_relationships.py b/src/tools/artifact_relationships.py
@@ -6,6 +6,7 @@
 import httpx
 from fastmcp import Context
 from fastmcp.exceptions import ToolError
+from loguru import logger
 
 from core import CodeAliveContext, get_api_key_from_context, log_api_request, log_api_response
 from utils import handle_api_error
@@ -40,10 +41,29 @@ async def get_artifact_relationships(
     """
     Retrieve relationship groups for a single artifact by profile.
 
-    Use this tool to explore an artifact's call graph, inheritance hierarchy,
-    or references. This is a drill-down tool — use it AFTER `semantic_search`,
-    `grep_search`, legacy `codebase_search`, or `fetch_artifacts` when you need
-    to understand how an artifact relates to others in the codebase.
+    Use this tool to expand the relationship graph around one known artifact:
+    call graph edges, inheritance hierarchy, or references.
+
+    Important usage rules:
+        - This is a graph expansion tool, not a search tool. The `identifier`
+          must be an exact artifact identifier returned by `semantic_search`,
+          `grep_search`, legacy `codebase_search`, or `fetch_artifacts`.
+        - Do not pass a repository name, file path, class name, method name, or
+          guessed symbol name unless it is the full identifier from a prior
+          tool result.
+        - If `found=false` or the backend returns a not-found/inaccessible
+          error, get a fresh identifier with `semantic_search`, `grep_search`,
+          `codebase_search`, or `fetch_artifacts` before retrying. Repeating
+          the same guessed identifier usually repeats the same failure.
+        - Relationships are primarily available for symbol artifacts such as
+          functions, methods, classes, and interfaces. Plain files and prose
+          documents can legitimately have no relationship graph.
+        - The response contains relationship metadata and short summaries, not
+          full source code. Use `fetch_artifacts` on returned identifiers when
+          exact source content is needed.
+        - If any relationship group has `truncated=true`, increase
+          `max_count_per_type` up to 1000 or narrow the investigation with a
+          more specific `profile`.
 
     Args:
         identifier: Fully qualified artifact identifier from search or fetch results.
@@ -68,17 +88,32 @@ async def get_artifact_relationships(
         When the artifact is not found or inaccessible:
             {"sourceIdentifier":"...","profile":"callsOnly","found":false}
     """
+    tool_arguments = {
+        "identifier": identifier,
+        "profile": profile,
+        "max_count_per_type": max_count_per_type,
+    }
+
     if not identifier:
+        logger.bind(tool=_TOOL_NAME, tool_arguments=tool_arguments).warning(
+            "Tool validation failed: artifact identifier is required"
+        )
         raise ToolError(f"[{_TOOL_NAME}] Artifact identifier is required.")
 
     if not (1 <= max_count_per_type <= 1000):
+        logger.bind(tool=_TOOL_NAME, tool_arguments=tool_arguments).warning(
+            "Tool validation failed: max_count_per_type is out of range"
+        )
         raise ToolError(f"[{_TOOL_NAME}] max_count_per_type must be between 1 and 1000.")
 
     # Literal type handles most validation via Pydantic, but direct callers
     # (e.g. unit tests) can still pass invalid values — keep as fallback.
     api_profile = PROFILE_MAP.get(profile)
     if api_profile is None:
         supported = ", ".join(PROFILE_MAP.keys())
+        logger.bind(tool=_TOOL_NAME, tool_arguments=tool_arguments).warning(
+            "Tool validation failed: unsupported relationship profile"
+        )
         raise ToolError(f'[{_TOOL_NAME}] Unsupported profile "{profile}". Use one of: {supported}')
 
     context: CodeAliveContext = ctx.request_context.lifespan_context
@@ -98,7 +133,7 @@ async def get_artifact_relationships(
             "maxCountPerType": max_count_per_type,
         }
 
-        await ctx.info(f"Fetching {profile} relationships for artifact")
+        await ctx.debug(f"Fetching {profile} relationships for artifact")
 
         full_url = urljoin(context.base_url, "/api/search/artifact-relationships")
         request_id = log_api_request("POST", full_url, headers, body=body)
@@ -113,6 +148,12 @@ async def get_artifact_relationships(
         return _build_relationships_dict(response.json())
 
     except (httpx.HTTPStatusError, Exception) as e:
+        logger.bind(
+            tool=_TOOL_NAME,
+            tool_arguments=tool_arguments,
+            error_type=type(e).__name__,
+            error=str(e),
+        ).warning("Tool call failed while fetching artifact relationships")
         await handle_api_error(
             ctx, e, "get artifact relationships", method=_TOOL_NAME,
             recovery_hints={