Skip to content

Commit d7ea07c

Browse files
authored
Merge branch 'main' into nmulepati/fix-590-remove-implicit-default-provider
2 parents 8177921 + a83968f commit d7ea07c

10 files changed

Lines changed: 607 additions & 47 deletions

File tree

architecture/mcp.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ Scoped to one `ToolConfig`. Provides the interface that `ModelFacade` uses:
3434
- **`process_completion_response`** — extracts tool calls from a completion, executes them in parallel via `MCPIOService`, returns `ChatMessage` list with results
3535
- **`refuse_completion_response`** — handles tool-call turn limits (prevents infinite tool loops)
3636

37+
Tool result messages may contain either text or ordered multimodal content blocks. MCP image results, and generic
38+
base64 payloads with `image/*` MIME metadata or image data URI prefixes, are preserved as canonical `image_url` data
39+
URI blocks and translated by provider adapters at the API boundary. Models need VLM-capable provider support to
40+
interpret those image results semantically.
41+
3742
### MCPRegistry
3843

3944
Maps `tool_alias``ToolConfig`. Lazy `MCPFacade` construction mirrors `ModelRegistry`. Provides health checks for configured tools.

packages/data-designer-engine/src/data_designer/engine/mcp/io.py

Lines changed: 191 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,9 @@
3232
import atexit
3333
import json
3434
import logging
35+
import re
3536
import threading
36-
from collections.abc import Coroutine, Iterable
37+
from collections.abc import Callable, Coroutine, Iterable
3738
from typing import Any
3839

3940
from mcp import ClientSession, StdioServerParameters
@@ -42,10 +43,16 @@
4243
from mcp.client.streamable_http import streamablehttp_client
4344

4445
from data_designer.config.mcp import LocalStdioMCPProvider, MCPProvider, MCPProviderT
46+
from data_designer.config.utils.image_helpers import (
47+
decode_base64_image,
48+
detect_image_format,
49+
extract_base64_from_data_uri,
50+
)
4551
from data_designer.engine.mcp.errors import MCPToolError
4652
from data_designer.engine.mcp.registry import MCPToolDefinition, MCPToolResult
4753

4854
logger = logging.getLogger(__name__)
55+
_DATA_URI_MIME_TYPE_RE = re.compile(r"^data:(?P<mime_type>[^;]+);base64,")
4956

5057

5158
def _provider_cache_key(provider: MCPProviderT) -> str:
@@ -289,7 +296,7 @@ async def _call_tool_async(
289296
session = await self._get_or_create_session(provider)
290297
result = await session.call_tool(tool_name, arguments)
291298

292-
content = _serialize_tool_result_content(result)
299+
content = _coerce_tool_result_content(result)
293300
is_error = getattr(result, "isError", None)
294301
if is_error is None:
295302
is_error = getattr(result, "is_error", False)
@@ -467,31 +474,195 @@ def _coerce_tool_definition(tool: Any, tool_definition_cls: type[MCPToolDefiniti
467474
return tool_definition_cls(name=name, description=description, input_schema=input_schema)
468475

469476

470-
def _serialize_tool_result_content(result: Any) -> str:
471-
"""Serialize tool result content to a string."""
477+
def _coerce_tool_result_content(result: Any) -> str | list[dict[str, Any]]:
478+
"""Coerce MCP tool result content while preserving image blocks."""
472479
content = getattr(result, "content", result)
473480
if content is None:
474481
return ""
475482
if isinstance(content, str):
476483
return content
477484
if isinstance(content, dict):
485+
if _is_image_url_block(content):
486+
return [_coerce_image_url_block(content)]
487+
if _is_image_content(content) or _has_base64_image_payload(content):
488+
return [_build_image_url_block(content)]
489+
if _is_text_content(content):
490+
return str(content.get("text", ""))
478491
return json.dumps(content)
492+
if _is_image_content(content) or _has_base64_image_payload(content):
493+
return [_build_image_url_block(content)]
494+
if _is_text_content(content):
495+
return str(_get_content_field(content, "text", default=""))
479496
if isinstance(content, list):
480-
parts: list[str] = []
497+
blocks: list[dict[str, Any]] = []
498+
has_image = False
481499
for item in content:
482-
if isinstance(item, str):
483-
parts.append(item)
484-
continue
485-
if isinstance(item, dict):
486-
if item.get("type") == "text":
487-
parts.append(str(item.get("text", "")))
488-
else:
489-
parts.append(json.dumps(item))
490-
continue
491-
text_value = getattr(item, "text", None)
492-
if text_value is not None:
493-
parts.append(str(text_value))
494-
else:
495-
parts.append(str(item))
496-
return "\n".join(parts)
500+
block = _coerce_tool_result_content_item(item)
501+
blocks.append(block)
502+
has_image = has_image or block.get("type") == "image_url"
503+
if has_image:
504+
return blocks
505+
return "\n".join(block.get("text", "") for block in blocks)
497506
return str(content)
507+
508+
509+
def _coerce_tool_result_content_item(item: Any) -> dict[str, Any]:
510+
"""Coerce a single MCP content item to an internal ChatML-style block."""
511+
if isinstance(item, str):
512+
return _build_text_block(item)
513+
if _is_image_url_block(item):
514+
return _coerce_image_url_block(item)
515+
if _is_image_content(item) or _has_base64_image_payload(item):
516+
return _build_image_url_block(item)
517+
if _is_text_content(item):
518+
return _build_text_block(_get_content_field(item, "text", default=""))
519+
if isinstance(item, dict):
520+
return _build_text_block(json.dumps(item))
521+
522+
text_value = getattr(item, "text", None)
523+
if text_value is not None:
524+
return _build_text_block(text_value)
525+
return _build_text_block(item)
526+
527+
528+
def _is_text_content(item: Any) -> bool:
529+
return _get_content_field(item, "type") == "text"
530+
531+
532+
def _is_image_content(item: Any) -> bool:
533+
return _get_content_field(item, "type") == "image"
534+
535+
536+
def _is_image_url_block(item: Any) -> bool:
537+
return isinstance(item, dict) and item.get("type") == "image_url"
538+
539+
540+
def _has_base64_image_payload(item: Any) -> bool:
541+
data = _get_content_field(item, "data", "b64_json", "base64")
542+
if not isinstance(data, str) or not data:
543+
return False
544+
545+
mime_type = _get_content_field(item, "mimeType", "mime_type", "media_type")
546+
if isinstance(mime_type, str) and mime_type:
547+
return _is_image_mime_type(mime_type)
548+
549+
data_uri_mime_type = _extract_data_uri_mime_type(data)
550+
return data_uri_mime_type is not None and _is_image_mime_type(data_uri_mime_type)
551+
552+
553+
def _coerce_image_url_block(block: dict[str, Any]) -> dict[str, Any]:
554+
image_url = block.get("image_url")
555+
if isinstance(image_url, str):
556+
image_url = {"url": image_url}
557+
elif not isinstance(image_url, dict):
558+
raise MCPToolError("MCP image_url block must contain an image_url dict or string.")
559+
560+
url = image_url.get("url")
561+
if not isinstance(url, str) or not url:
562+
raise MCPToolError("MCP image_url block must contain a non-empty string URL.")
563+
if url.startswith(("http://", "https://")):
564+
return {"type": "image_url", "image_url": image_url}
565+
if url.startswith("data:"):
566+
_extract_mime_type_from_data_uri(url)
567+
_coerce_base64_image_data(url)
568+
return {"type": "image_url", "image_url": image_url}
569+
570+
return _build_image_url_block({"base64": url})
571+
572+
573+
def _build_image_url_block(item: Any) -> dict[str, Any]:
574+
data = _get_content_field(item, "data", "b64_json", "base64")
575+
mime_type = _get_content_field(item, "mimeType", "mime_type", "media_type")
576+
if not isinstance(data, str) or not data:
577+
raise MCPToolError("MCP image content is missing base64 data.")
578+
mime_type = _coerce_image_mime_type(data, mime_type)
579+
base64_data = _coerce_base64_image_data(data)
580+
581+
return {
582+
"type": "image_url",
583+
"image_url": {"url": f"data:{mime_type};base64,{base64_data}"},
584+
}
585+
586+
587+
def _coerce_image_mime_type(data: str, mime_type: Any) -> str:
588+
if isinstance(mime_type, str) and mime_type:
589+
if not _is_image_mime_type(mime_type):
590+
raise MCPToolError(f"MCP image content must use an image MIME type, got {mime_type!r}.")
591+
return mime_type
592+
593+
data_uri_mime_type = _extract_mime_type_from_data_uri(data)
594+
if data_uri_mime_type is not None:
595+
return data_uri_mime_type
596+
597+
try:
598+
return f"image/{detect_image_format(decode_base64_image(data)).value}"
599+
except ValueError as exc:
600+
raise MCPToolError("MCP image content is missing a MIME type.") from exc
601+
602+
603+
def _coerce_base64_image_data(data: str) -> str:
604+
try:
605+
base64_data = extract_base64_from_data_uri(data)
606+
decode_base64_image(base64_data)
607+
return base64_data
608+
except ValueError as exc:
609+
raise MCPToolError("MCP image content has invalid base64 data.") from exc
610+
611+
612+
def _extract_mime_type_from_data_uri(data: str) -> str | None:
613+
mime_type = _extract_data_uri_mime_type(data)
614+
if mime_type is None:
615+
return None
616+
if not _is_image_mime_type(mime_type):
617+
raise MCPToolError(f"MCP image content data URI must use an image MIME type, got {mime_type!r}.")
618+
return mime_type
619+
620+
621+
def _extract_data_uri_mime_type(data: str) -> str | None:
622+
match = _DATA_URI_MIME_TYPE_RE.match(data)
623+
if match is None:
624+
return None
625+
return match.group("mime_type")
626+
627+
628+
def _is_image_mime_type(mime_type: str) -> bool:
629+
return mime_type.lower().startswith("image/")
630+
631+
632+
def _get_content_field(item: Any, *names: str, default: Any = None) -> Any:
633+
if isinstance(item, dict):
634+
for name in names:
635+
if name in item:
636+
return item[name]
637+
return default
638+
639+
for name in names:
640+
if hasattr(item, name):
641+
return getattr(item, name)
642+
643+
model_dump = getattr(item, "model_dump", None)
644+
if callable(model_dump):
645+
return _get_content_field_from_dump(model_dump, names, default)
646+
647+
dict_dump = getattr(item, "dict", None)
648+
if callable(dict_dump):
649+
return _get_content_field_from_dump(dict_dump, names, default)
650+
651+
return default
652+
653+
654+
def _get_content_field_from_dump(dump_method: Callable[..., Any], names: tuple[str, ...], default: Any) -> Any:
655+
for kwargs in ({"by_alias": True}, {}):
656+
try:
657+
dumped = dump_method(**kwargs)
658+
except TypeError:
659+
continue
660+
if isinstance(dumped, dict):
661+
for name in names:
662+
if name in dumped:
663+
return dumped[name]
664+
return default
665+
666+
667+
def _build_text_block(value: Any) -> dict[str, Any]:
668+
return {"type": "text", "text": str(value)}

packages/data-designer-engine/src/data_designer/engine/mcp/registry.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def to_openai_tool_schema(self) -> dict[str, Any]:
5050
class MCPToolResult:
5151
"""Result from executing an MCP tool call."""
5252

53-
content: str
53+
content: str | list[dict[str, Any]]
5454
is_error: bool = False
5555

5656

packages/data-designer-engine/src/data_designer/engine/models/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def as_system(cls, content: str) -> ChatMessage:
7878
return cls(role="system", content=content)
7979

8080
@classmethod
81-
def as_tool(cls, content: str, tool_call_id: str) -> ChatMessage:
81+
def as_tool(cls, content: str | list[dict[str, Any]], tool_call_id: str) -> ChatMessage:
8282
"""Create a tool response message."""
8383
return cls(role="tool", content=content, tool_call_id=tool_call_id)
8484

packages/data-designer-engine/tests/engine/mcp/test_mcp_facade.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,40 @@ def mock_call_tools(
122122
assert messages[1].tool_call_id == "call-1"
123123

124124

125+
def test_process_completion_preserves_multimodal_tool_result_content(
126+
monkeypatch: pytest.MonkeyPatch,
127+
stub_mcp_facade: MCPFacade,
128+
mock_completion_response_single_tool: ChatCompletionResponse,
129+
) -> None:
130+
"""Tool result messages can carry multimodal content blocks unchanged."""
131+
132+
multimodal_result = [
133+
{"type": "text", "text": "Screenshot:"},
134+
{"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBORw0KGgo="}},
135+
{"type": "text", "text": "Use the chart title."},
136+
]
137+
138+
def mock_list_tools(provider: Any, timeout_sec: float | None = None) -> tuple[MCPToolDefinition, ...]:
139+
return (MCPToolDefinition(name="lookup", description="Lookup", input_schema={"type": "object"}),)
140+
141+
def mock_call_tools(
142+
calls: list[tuple[Any, str, dict[str, Any]]],
143+
*,
144+
timeout_sec: float | None = None,
145+
) -> list[MCPToolResult]:
146+
return [MCPToolResult(content=multimodal_result)]
147+
148+
monkeypatch.setattr(mcp_io, "list_tools", mock_list_tools)
149+
monkeypatch.setattr(mcp_io, "call_tools", mock_call_tools)
150+
151+
messages = stub_mcp_facade.process_completion_response(mock_completion_response_single_tool)
152+
153+
assert len(messages) == 2
154+
assert messages[1].role == "tool"
155+
assert messages[1].tool_call_id == "call-1"
156+
assert messages[1].content == multimodal_result
157+
158+
125159
def test_process_completion_preserves_content(
126160
stub_mcp_facade: MCPFacade,
127161
mock_completion_response_no_tools: ChatCompletionResponse,

0 commit comments

Comments
 (0)