Skip to content

Commit 5277c60

Browse files
fix(integrations): pydantic-ai: properly format binary input message parts to be conformant with the gen_ai.request.messages structure (#5251)
#### Issues Closes https://linear.app/getsentry/issue/TET-1634/redact-images-pydantic-ai
1 parent ead491d commit 5277c60

File tree

4 files changed

+183
-5
lines changed

4 files changed

+183
-5
lines changed

sentry_sdk/integrations/pydantic_ai/spans/ai_client.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
import sentry_sdk
2-
from sentry_sdk.ai.utils import set_data_normalized
2+
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
3+
from sentry_sdk.ai.utils import (
4+
normalize_message_roles,
5+
set_data_normalized,
6+
truncate_and_annotate_messages,
7+
get_modality_from_mime_type,
8+
)
39
from sentry_sdk.consts import OP, SPANDATA
410
from sentry_sdk.utils import safe_serialize
511

@@ -29,6 +35,7 @@
2935
UserPromptPart,
3036
TextPart,
3137
ThinkingPart,
38+
BinaryContent,
3239
)
3340
except ImportError:
3441
# Fallback if these classes are not available
@@ -38,6 +45,7 @@
3845
UserPromptPart = None
3946
TextPart = None
4047
ThinkingPart = None
48+
BinaryContent = None
4149

4250

4351
def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> None:
@@ -107,6 +115,17 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non
107115
for item in part.content:
108116
if isinstance(item, str):
109117
content.append({"type": "text", "text": item})
118+
elif BinaryContent and isinstance(item, BinaryContent):
119+
content.append(
120+
{
121+
"type": "blob",
122+
"modality": get_modality_from_mime_type(
123+
item.media_type
124+
),
125+
"mime_type": item.media_type,
126+
"content": BLOB_DATA_SUBSTITUTE,
127+
}
128+
)
110129
else:
111130
content.append(safe_serialize(item))
112131
else:
@@ -124,8 +143,13 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non
124143
formatted_messages.append(message)
125144

126145
if formatted_messages:
146+
normalized_messages = normalize_message_roles(formatted_messages)
147+
scope = sentry_sdk.get_current_scope()
148+
messages_data = truncate_and_annotate_messages(
149+
normalized_messages, span, scope
150+
)
127151
set_data_normalized(
128-
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, formatted_messages, unpack=False
152+
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
129153
)
130154
except Exception:
131155
# If we fail to format messages, just skip it

sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
import sentry_sdk
2-
from sentry_sdk.ai.utils import get_start_span_function, set_data_normalized
2+
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
3+
from sentry_sdk.ai.utils import (
4+
get_modality_from_mime_type,
5+
get_start_span_function,
6+
normalize_message_roles,
7+
set_data_normalized,
8+
truncate_and_annotate_messages,
9+
)
310
from sentry_sdk.consts import OP, SPANDATA
411

512
from ..consts import SPAN_ORIGIN
@@ -16,6 +23,11 @@
1623
if TYPE_CHECKING:
1724
from typing import Any
1825

26+
try:
27+
from pydantic_ai.messages import BinaryContent # type: ignore
28+
except ImportError:
29+
BinaryContent = None
30+
1931

2032
def invoke_agent_span(
2133
user_prompt: "Any",
@@ -93,6 +105,17 @@ def invoke_agent_span(
93105
for item in user_prompt:
94106
if isinstance(item, str):
95107
content.append({"text": item, "type": "text"})
108+
elif BinaryContent and isinstance(item, BinaryContent):
109+
content.append(
110+
{
111+
"type": "blob",
112+
"modality": get_modality_from_mime_type(
113+
item.media_type
114+
),
115+
"mime_type": item.media_type,
116+
"content": BLOB_DATA_SUBSTITUTE,
117+
}
118+
)
96119
if content:
97120
messages.append(
98121
{
@@ -102,8 +125,13 @@ def invoke_agent_span(
102125
)
103126

104127
if messages:
128+
normalized_messages = normalize_message_roles(messages)
129+
scope = sentry_sdk.get_current_scope()
130+
messages_data = truncate_and_annotate_messages(
131+
normalized_messages, span, scope
132+
)
105133
set_data_normalized(
106-
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages, unpack=False
134+
span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
107135
)
108136

109137
return span

sentry_sdk/integrations/pydantic_ai/spans/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from typing import TYPE_CHECKING
77

88
if TYPE_CHECKING:
9-
from typing import Union
9+
from typing import Union, Dict, Any, List
1010
from pydantic_ai.usage import RequestUsage, RunUsage # type: ignore
1111

1212

tests/integrations/pydantic_ai/test_pydantic_ai.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
import asyncio
2+
import json
23
import pytest
4+
from unittest.mock import MagicMock
35

46
from typing import Annotated
57
from pydantic import Field
68

9+
import sentry_sdk
10+
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
711
from sentry_sdk.integrations.pydantic_ai import PydanticAIIntegration
12+
from sentry_sdk.integrations.pydantic_ai.spans.ai_client import _set_input_messages
813

914
from pydantic_ai import Agent
15+
from pydantic_ai.messages import BinaryContent, UserPromptPart
1016
from pydantic_ai.models.test import TestModel
1117
from pydantic_ai.exceptions import ModelRetry, UnexpectedModelBehavior
1218

@@ -2604,3 +2610,123 @@ async def test_ai_client_span_gets_agent_from_scope(sentry_init, capture_events)
26042610

26052611
# Should not crash
26062612
assert transaction is not None
2613+
2614+
2615+
def _get_messages_from_span(span_data):
2616+
"""Helper to extract and parse messages from span data."""
2617+
messages_data = span_data["gen_ai.request.messages"]
2618+
return (
2619+
json.loads(messages_data) if isinstance(messages_data, str) else messages_data
2620+
)
2621+
2622+
2623+
def _find_binary_content(messages_data, expected_modality, expected_mime_type):
2624+
"""Helper to find and verify binary content in messages."""
2625+
for msg in messages_data:
2626+
if "content" not in msg:
2627+
continue
2628+
for content_item in msg["content"]:
2629+
if content_item.get("type") == "blob":
2630+
assert content_item["modality"] == expected_modality
2631+
assert content_item["mime_type"] == expected_mime_type
2632+
assert content_item["content"] == BLOB_DATA_SUBSTITUTE
2633+
return True
2634+
return False
2635+
2636+
2637+
@pytest.mark.asyncio
2638+
async def test_binary_content_encoding_image(sentry_init, capture_events):
2639+
"""Test that BinaryContent with image data is properly encoded in messages."""
2640+
sentry_init(
2641+
integrations=[PydanticAIIntegration()],
2642+
traces_sample_rate=1.0,
2643+
send_default_pii=True,
2644+
)
2645+
2646+
events = capture_events()
2647+
2648+
with sentry_sdk.start_transaction(op="test", name="test"):
2649+
span = sentry_sdk.start_span(op="test_span")
2650+
binary_content = BinaryContent(
2651+
data=b"fake_image_data_12345", media_type="image/png"
2652+
)
2653+
user_part = UserPromptPart(content=["Look at this image:", binary_content])
2654+
mock_msg = MagicMock()
2655+
mock_msg.parts = [user_part]
2656+
mock_msg.instructions = None
2657+
2658+
_set_input_messages(span, [mock_msg])
2659+
span.finish()
2660+
2661+
(event,) = events
2662+
span_data = event["spans"][0]["data"]
2663+
messages_data = _get_messages_from_span(span_data)
2664+
assert _find_binary_content(messages_data, "image", "image/png")
2665+
2666+
2667+
@pytest.mark.asyncio
2668+
async def test_binary_content_encoding_mixed_content(sentry_init, capture_events):
2669+
"""Test that BinaryContent mixed with text content is properly handled."""
2670+
sentry_init(
2671+
integrations=[PydanticAIIntegration()],
2672+
traces_sample_rate=1.0,
2673+
send_default_pii=True,
2674+
)
2675+
2676+
events = capture_events()
2677+
2678+
with sentry_sdk.start_transaction(op="test", name="test"):
2679+
span = sentry_sdk.start_span(op="test_span")
2680+
binary_content = BinaryContent(
2681+
data=b"fake_image_bytes", media_type="image/jpeg"
2682+
)
2683+
user_part = UserPromptPart(
2684+
content=["Here is an image:", binary_content, "What do you see?"]
2685+
)
2686+
mock_msg = MagicMock()
2687+
mock_msg.parts = [user_part]
2688+
mock_msg.instructions = None
2689+
2690+
_set_input_messages(span, [mock_msg])
2691+
span.finish()
2692+
2693+
(event,) = events
2694+
span_data = event["spans"][0]["data"]
2695+
messages_data = _get_messages_from_span(span_data)
2696+
2697+
# Verify both text and binary content are present
2698+
found_text = any(
2699+
content_item.get("type") == "text"
2700+
for msg in messages_data
2701+
if "content" in msg
2702+
for content_item in msg["content"]
2703+
)
2704+
assert found_text, "Text content should be found"
2705+
assert _find_binary_content(messages_data, "image", "image/jpeg")
2706+
2707+
2708+
@pytest.mark.asyncio
2709+
async def test_binary_content_in_agent_run(sentry_init, capture_events):
2710+
"""Test that BinaryContent in actual agent run is properly captured in spans."""
2711+
agent = Agent("test", name="test_binary_agent")
2712+
2713+
sentry_init(
2714+
integrations=[PydanticAIIntegration()],
2715+
traces_sample_rate=1.0,
2716+
send_default_pii=True,
2717+
)
2718+
2719+
events = capture_events()
2720+
binary_content = BinaryContent(
2721+
data=b"fake_image_data_for_testing", media_type="image/png"
2722+
)
2723+
await agent.run(["Analyze this image:", binary_content])
2724+
2725+
(transaction,) = events
2726+
chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"]
2727+
assert len(chat_spans) >= 1
2728+
2729+
chat_span = chat_spans[0]
2730+
if "gen_ai.request.messages" in chat_span["data"]:
2731+
messages_str = str(chat_span["data"]["gen_ai.request.messages"])
2732+
assert any(keyword in messages_str for keyword in ["blob", "image", "base64"])

0 commit comments

Comments
 (0)