feat(p8.2): MCP v3 sampling 模块 - server 借用 client LLM

ZhaoXingPeng · ZhaoXingPeng · commit befd894a0bf8 · 2026-06-01T16:42:00.000+08:00
新增 src/dbjavagenix/mcp_apps/sampling.py。

sampling 是 elicitation 的镜像 — 不是问用户,是问客户端的 LLM。
server 通过 sampling/createMessage 反向调 client,client 决定是否同意
(通常会请求用户授权),用它自己的 LLM 跑,把结果返回。

3 个核心:
- ModelPreferences: 软偏好 (intelligence / speed / cost priority + hints)
  序列化成 modelPreferences 字段
- build_sampling_request: 构造 sampling/createMessage payload
- SamplingClient: 注入 dispatcher 的薄适配器,服务端代码可以用
  `await client.complete(msg)` 像调 anthropic SDK 那样调

最实际用途: ai_infer_business_names 在没有 ANTHROPIC_API_KEY 的 CI 环境
也能跑 LLM 增强 — 通过 client 的 LLM 配额。

15 个 unit test 覆盖 ModelPreferences 序列化 / sampling request 构造 /
错误输入 / 同步+异步 dispatcher / 各种响应形态提取。

Why: ADR-004 (规则优先 LLM 可选) 当前要求 ANTHROPIC_API_KEY。sampling
让 LLM 路径在 CI/无 key 环境也能用,且把 LLM 成本和管控交给 client。
diff --git a/src/dbjavagenix/mcp_apps/sampling.py b/src/dbjavagenix/mcp_apps/sampling.py
@@ -0,0 +1,159 @@
+"""MCP v3 (2025-06-18 spec) sampling helper.
+
+`sampling` is the *inverse* of regular tool calls: the server asks the client
+to run an LLM inference on its behalf. The client decides whether to comply
+(may prompt the user for approval) and uses its own LLM/budget.
+
+Wire format:
+```
+{
+  "method": "sampling/createMessage",
+  "params": {
+    "messages": [...],
+    "systemPrompt": "...",
+    "modelPreferences": {...},
+    "maxTokens": 1024
+  }
+}
+```
+
+This module:
+1. Builds the request payload (`build_sampling_request`)
+2. Provides a thin Python adapter `SamplingClient` that mirrors a subset of
+   the anthropic SDK surface so server-side code can swap between direct API
+   calls and sampling with minimal changes.
+
+Use case in DBJavaGenix:
+- `ai_infer_business_names` would normally call Anthropic API directly
+  (requires ANTHROPIC_API_KEY).
+- With sampling, the same call goes to the client; CI/offline environments
+  without an API key can still get LLM-enhanced inference via the client
+  (which has its own model + budget).
+
+Compatibility (2026-06):
+- Claude Desktop 4.6+: full support
+- Claude Code 2.x: full support
+- Cherry Studio: not yet (planned)
+- Cursor / Continue.dev: not yet
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Any
+
+
+@dataclass
+class ModelPreferences:
+    """Soft preferences for which client model to use.
+
+    The client is free to ignore these (e.g. force its own default).
+    """
+
+    intelligence_priority: float = 0.5
+    speed_priority: float = 0.5
+    cost_priority: float = 0.0
+    hints: list[str] = field(default_factory=list)
+
+    def to_dict(self) -> dict[str, Any]:
+        out: dict[str, Any] = {
+            "intelligencePriority": self.intelligence_priority,
+            "speedPriority": self.speed_priority,
+            "costPriority": self.cost_priority,
+        }
+        if self.hints:
+            out["hints"] = [{"name": h} for h in self.hints]
+        return out
+
+
+def build_sampling_request(
+    user_message: str,
+    system_prompt: str | None = None,
+    max_tokens: int = 1024,
+    model_prefs: ModelPreferences | None = None,
+) -> dict[str, Any]:
+    """Build an MCP sampling/createMessage request payload.
+
+    Args:
+        user_message: the message the server wants the LLM to respond to
+        system_prompt: optional system instruction
+        max_tokens: max output tokens
+        model_prefs: optional client-side model selection hints
+
+    Returns:
+        Dict ready to be sent as `params` of sampling/createMessage.
+    """
+    if not user_message:
+        raise ValueError("user_message must be non-empty")
+    if max_tokens <= 0 or max_tokens > 8192:
+        raise ValueError("max_tokens out of range (1-8192)")
+
+    payload: dict[str, Any] = {
+        "messages": [
+            {
+                "role": "user",
+                "content": {"type": "text", "text": user_message},
+            }
+        ],
+        "maxTokens": max_tokens,
+    }
+    if system_prompt:
+        payload["systemPrompt"] = system_prompt
+    if model_prefs:
+        payload["modelPreferences"] = model_prefs.to_dict()
+    return payload
+
+
+class SamplingClient:
+    """A thin Python adapter that wraps a sampling-dispatch callable.
+
+    The real MCP server transport injects the dispatcher (it knows how to
+    serialize and forward to the client). This adapter lets server-side code
+    write `client.complete(...)` the same way it would call the anthropic SDK,
+    without depending on the actual transport.
+    """
+
+    def __init__(self, dispatcher):
+        """Args:
+        dispatcher: callable(payload_dict) -> response_dict, async or sync.
+            Returns the response.content[0].text from the client's LLM.
+        """
+        if not callable(dispatcher):
+            raise TypeError("dispatcher must be callable")
+        self._dispatch = dispatcher
+
+    async def complete(
+        self,
+        user_message: str,
+        system_prompt: str | None = None,
+        max_tokens: int = 1024,
+        model_prefs: ModelPreferences | None = None,
+    ) -> str:
+        """Send a sampling request and return the text response."""
+        payload = build_sampling_request(
+            user_message=user_message,
+            system_prompt=system_prompt,
+            max_tokens=max_tokens,
+            model_prefs=model_prefs,
+        )
+        result = self._dispatch(payload)
+        if hasattr(result, "__await__"):
+            result = await result
+        return _extract_text(result)
+
+
+def _extract_text(response: Any) -> str:
+    """Pull the response text out of various plausible response shapes."""
+    if response is None:
+        return ""
+    if isinstance(response, str):
+        return response
+    if isinstance(response, dict):
+        content = response.get("content")
+        if isinstance(content, list) and content:
+            first = content[0]
+            if isinstance(first, dict) and first.get("type") == "text":
+                return str(first.get("text", ""))
+        if "text" in response:
+            return str(response["text"])
+    return str(response)
diff --git a/tests/unit/test_sampling.py b/tests/unit/test_sampling.py
@@ -0,0 +1,113 @@
+"""Unit tests for mcp_apps.sampling."""
+import asyncio
+
+import pytest
+
+from dbjavagenix.mcp_apps.sampling import (
+    ModelPreferences,
+    SamplingClient,
+    build_sampling_request,
+)
+
+
+def _run(coro):
+    return asyncio.get_event_loop().run_until_complete(coro)
+
+
+class TestModelPreferences:
+    def test_default_serialization(self):
+        p = ModelPreferences()
+        d = p.to_dict()
+        assert d["intelligencePriority"] == 0.5
+        assert d["speedPriority"] == 0.5
+        assert "hints" not in d
+
+    def test_hints_serialized_as_name_objects(self):
+        p = ModelPreferences(hints=["claude-sonnet-4-6"])
+        d = p.to_dict()
+        assert d["hints"] == [{"name": "claude-sonnet-4-6"}]
+
+    def test_custom_priorities(self):
+        p = ModelPreferences(intelligence_priority=0.9, cost_priority=0.1)
+        d = p.to_dict()
+        assert d["intelligencePriority"] == 0.9
+        assert d["costPriority"] == 0.1
+
+
+class TestBuildSamplingRequest:
+    def test_minimal(self):
+        r = build_sampling_request("infer names for sys_user")
+        assert r["maxTokens"] == 1024
+        assert len(r["messages"]) == 1
+        assert r["messages"][0]["role"] == "user"
+        assert "systemPrompt" not in r
+        assert "modelPreferences" not in r
+
+    def test_with_system_prompt(self):
+        r = build_sampling_request("hello", system_prompt="You are an expert.")
+        assert r["systemPrompt"] == "You are an expert."
+
+    def test_with_model_prefs(self):
+        r = build_sampling_request("hello", model_prefs=ModelPreferences(intelligence_priority=0.9))
+        assert r["modelPreferences"]["intelligencePriority"] == 0.9
+
+    def test_empty_message_raises(self):
+        with pytest.raises(ValueError):
+            build_sampling_request("")
+
+    def test_invalid_max_tokens(self):
+        with pytest.raises(ValueError):
+            build_sampling_request("hi", max_tokens=0)
+        with pytest.raises(ValueError):
+            build_sampling_request("hi", max_tokens=10000)
+
+
+class TestSamplingClient:
+    def test_sync_dispatcher(self):
+        captured = {}
+
+        def dispatcher(payload):
+            captured["payload"] = payload
+            return {"content": [{"type": "text", "text": "ok"}]}
+
+        client = SamplingClient(dispatcher)
+        result = _run(client.complete("ping"))
+        assert result == "ok"
+        assert captured["payload"]["messages"][0]["content"]["text"] == "ping"
+
+    def test_async_dispatcher(self):
+        async def dispatcher(payload):
+            return {"content": [{"type": "text", "text": "async-ok"}]}
+
+        client = SamplingClient(dispatcher)
+        result = _run(client.complete("ping"))
+        assert result == "async-ok"
+
+    def test_non_callable_raises(self):
+        with pytest.raises(TypeError):
+            SamplingClient("not a function")
+
+    def test_extracts_text_from_string_response(self):
+        client = SamplingClient(lambda p: "plain string")
+        result = _run(client.complete("ping"))
+        assert result == "plain string"
+
+    def test_extracts_text_from_text_key(self):
+        client = SamplingClient(lambda p: {"text": "from-text-key"})
+        result = _run(client.complete("ping"))
+        assert result == "from-text-key"
+
+    def test_none_response_returns_empty(self):
+        client = SamplingClient(lambda p: None)
+        result = _run(client.complete("ping"))
+        assert result == ""
+
+    def test_propagates_system_prompt(self):
+        captured = {}
+        def dispatcher(p):
+            captured["p"] = p
+            return {"text": "ok"}
+
+        client = SamplingClient(dispatcher)
+        _run(client.complete("ping", system_prompt="sys"))
+        assert captured["p"]["systemPrompt"] == "sys"