Skip to content

Commit df2a7b3

Browse files
committed
fix: validate MCP image payload blocks
Signed-off-by: Nabin Mulepati <nmulepati@nvidia.com>
1 parent 362f139 commit df2a7b3

3 files changed

Lines changed: 77 additions & 16 deletions

File tree

architecture/mcp.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,10 @@ Scoped to one `ToolConfig`. Provides the interface that `ModelFacade` uses:
3434
- **`process_completion_response`** — extracts tool calls from a completion, executes them in parallel via `MCPIOService`, returns `ChatMessage` list with results
3535
- **`refuse_completion_response`** — handles tool-call turn limits (prevents infinite tool loops)
3636

37-
Tool result messages may contain either text or ordered multimodal content blocks. MCP image results and explicit
38-
base64 image payloads are preserved as canonical `image_url` data URI blocks and translated by provider adapters at
39-
the API boundary. Models need VLM-capable provider support to interpret those image results semantically.
37+
Tool result messages may contain either text or ordered multimodal content blocks. MCP image results, and generic
38+
base64 payloads with `image/*` MIME metadata or image data URI prefixes, are preserved as canonical `image_url` data
39+
URI blocks and translated by provider adapters at the API boundary. Models need VLM-capable provider support to
40+
interpret those image results semantically.
4041

4142
### MCPRegistry
4243

packages/data-designer-engine/src/data_designer/engine/mcp/io.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -552,12 +552,20 @@ def _has_base64_image_payload(item: Any) -> bool:
552552

553553
def _coerce_image_url_block(block: dict[str, Any]) -> dict[str, Any]:
554554
image_url = block.get("image_url")
555-
if not isinstance(image_url, dict):
556-
return block
555+
if isinstance(image_url, str):
556+
image_url = {"url": image_url}
557+
elif not isinstance(image_url, dict):
558+
raise MCPToolError("MCP image_url block must contain an image_url dict or string.")
557559

558560
url = image_url.get("url")
559-
if not isinstance(url, str) or url.startswith(("data:image/", "http://", "https://")):
560-
return block
561+
if not isinstance(url, str) or not url:
562+
raise MCPToolError("MCP image_url block must contain a non-empty string URL.")
563+
if url.startswith(("http://", "https://")):
564+
return {"type": "image_url", "image_url": image_url}
565+
if url.startswith("data:"):
566+
_extract_mime_type_from_data_uri(url)
567+
_coerce_base64_image_data(url)
568+
return {"type": "image_url", "image_url": image_url}
561569

562570
return _build_image_url_block({"base64": url})
563571

@@ -594,7 +602,9 @@ def _coerce_image_mime_type(data: str, mime_type: Any) -> str:
594602

595603
def _coerce_base64_image_data(data: str) -> str:
596604
try:
597-
return extract_base64_from_data_uri(data)
605+
base64_data = extract_base64_from_data_uri(data)
606+
decode_base64_image(base64_data)
607+
return base64_data
598608
except ValueError as exc:
599609
raise MCPToolError("MCP image content has invalid base64 data.") from exc
600610

packages/data-designer-engine/tests/engine/mcp/test_mcp_io.py

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -227,10 +227,10 @@ def test_coerce_content_base64_payload_dict_with_media_type() -> None:
227227
"""Explicit media_type payloads do not need image format detection."""
228228

229229
class FakeResult:
230-
content = [{"base64": "abc123", "media_type": "image/webp"}]
230+
content = [{"base64": "YWJjMTIz", "media_type": "image/webp"}]
231231

232232
assert mcp_io._coerce_tool_result_content(FakeResult()) == [
233-
{"type": "image_url", "image_url": {"url": "data:image/webp;base64,abc123"}}
233+
{"type": "image_url", "image_url": {"url": "data:image/webp;base64,YWJjMTIz"}}
234234
]
235235

236236

@@ -278,6 +278,17 @@ class FakeResult:
278278
assert mcp_io._coerce_tool_result_content(FakeResult()) == [{"type": "text", "text": "before"}, image_block]
279279

280280

281+
def test_coerce_content_normalizes_image_url_string_shorthand() -> None:
282+
"""Common shorthand image_url strings are normalized to canonical blocks."""
283+
284+
class FakeResult:
285+
content = [{"type": "image_url", "image_url": "data:image/png;base64,iVBORw0KGgo="}]
286+
287+
assert mcp_io._coerce_tool_result_content(FakeResult()) == [
288+
{"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBORw0KGgo="}}
289+
]
290+
291+
281292
def test_coerce_content_normalizes_image_url_raw_base64() -> None:
282293
"""Non-canonical image_url blocks with raw base64 are normalized to data URIs."""
283294

@@ -289,6 +300,35 @@ class FakeResult:
289300
]
290301

291302

303+
@pytest.mark.parametrize(
304+
"image_url",
305+
[
306+
pytest.param(None, id="missing"),
307+
pytest.param(123, id="non-dict"),
308+
pytest.param({}, id="missing-url"),
309+
pytest.param({"url": 123}, id="non-string-url"),
310+
],
311+
)
312+
def test_coerce_content_rejects_malformed_image_url_blocks(image_url: object) -> None:
313+
"""MCP coercion enforces canonical image_url block shape."""
314+
315+
class FakeResult:
316+
content = [{"type": "image_url", "image_url": image_url}]
317+
318+
with pytest.raises(MCPToolError, match="image_url block"):
319+
mcp_io._coerce_tool_result_content(FakeResult())
320+
321+
322+
def test_coerce_content_rejects_image_url_invalid_base64_data_uri() -> None:
323+
"""Canonical data URI image_url blocks still need valid base64 payloads."""
324+
325+
class FakeResult:
326+
content = [{"type": "image_url", "image_url": {"url": "data:image/png;base64,not-base64!!!"}}]
327+
328+
with pytest.raises(MCPToolError, match="invalid base64"):
329+
mcp_io._coerce_tool_result_content(FakeResult())
330+
331+
292332
def test_coerce_content_bare_image_object() -> None:
293333
"""Test coercing a bare MCP-like image object."""
294334

@@ -307,7 +347,7 @@ def test_coerce_content_mixed_text_image_preserves_order() -> None:
307347

308348
class ImageItem:
309349
type = "image"
310-
data = "abc123"
350+
data = "YWJjMTIz"
311351
mimeType = "image/jpeg"
312352

313353
class TextItem:
@@ -319,14 +359,14 @@ class FakeResult:
319359
{"type": "text", "text": "before"},
320360
ImageItem(),
321361
TextItem(),
322-
{"type": "image", "data": "def456", "mime_type": "image/webp"},
362+
{"type": "image", "data": "ZGVmNDU2", "mime_type": "image/webp"},
323363
]
324364

325365
assert mcp_io._coerce_tool_result_content(FakeResult()) == [
326366
{"type": "text", "text": "before"},
327-
{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,abc123"}},
367+
{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,YWJjMTIz"}},
328368
{"type": "text", "text": "after"},
329-
{"type": "image_url", "image_url": {"url": "data:image/webp;base64,def456"}},
369+
{"type": "image_url", "image_url": {"url": "data:image/webp;base64,ZGVmNDU2"}},
330370
]
331371

332372

@@ -336,13 +376,13 @@ def test_coerce_content_real_mcp_text_and_image_objects() -> None:
336376
class FakeResult:
337377
content = [
338378
TextContent(type="text", text="before"),
339-
ImageContent(type="image", data="abc123", mimeType="image/png"),
379+
ImageContent(type="image", data="YWJjMTIz", mimeType="image/png"),
340380
TextContent(type="text", text="after"),
341381
]
342382

343383
assert mcp_io._coerce_tool_result_content(FakeResult()) == [
344384
{"type": "text", "text": "before"},
345-
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc123"}},
385+
{"type": "image_url", "image_url": {"url": "data:image/png;base64,YWJjMTIz"}},
346386
{"type": "text", "text": "after"},
347387
]
348388

@@ -357,6 +397,16 @@ class FakeResult:
357397
mcp_io._coerce_tool_result_content(FakeResult())
358398

359399

400+
def test_coerce_content_image_with_invalid_base64_fails_clearly() -> None:
401+
"""Explicit MCP image content must contain valid base64 data."""
402+
403+
class FakeResult:
404+
content = [{"type": "image", "data": "not-base64!!!", "mimeType": "image/png"}]
405+
406+
with pytest.raises(MCPToolError, match="invalid base64"):
407+
mcp_io._coerce_tool_result_content(FakeResult())
408+
409+
360410
def test_coerce_content_image_with_non_image_mime_type_fails_clearly() -> None:
361411
"""Explicit MCP image content must not produce non-image image_url data URIs."""
362412

0 commit comments

Comments
 (0)