feat(pydantic-ai): Set system instruction attribute (#5356)

alexander-alderman-webb · web-flow · commit a6170fca90d7 · 2026-01-23T09:49:33.000+01:00
Set the system instruction attribute on `ai_chat` spans in the `PydanticAIIntegration`. Extract the instructions from `ModelRequest` messages in the message history, including the `instructions` field, if present, and parts of type `SystemPromptPart`.
diff --git a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py
@@ -26,9 +26,11 @@
 if TYPE_CHECKING:
     from typing import Any, List, Dict
     from pydantic_ai.usage import RequestUsage  # type: ignore
+    from pydantic_ai.messages import ModelMessage, SystemPromptPart  # type: ignore
+    from sentry_sdk._types import TextPart as SentryTextPart
 
 try:
-    from pydantic_ai.messages import (  # type: ignore
+    from pydantic_ai.messages import (
         BaseToolCallPart,
         BaseToolReturnPart,
         SystemPromptPart,
@@ -48,6 +50,47 @@
     BinaryContent = None
 
 
+def _transform_system_instructions(
+    permanent_instructions: "list[SystemPromptPart]",
+    current_instructions: "list[str]",
+) -> "list[SentryTextPart]":
+    text_parts: "list[SentryTextPart]" = [
+        {
+            "type": "text",
+            "content": instruction.content,
+        }
+        for instruction in permanent_instructions
+    ]
+
+    text_parts.extend(
+        {
+            "type": "text",
+            "content": instruction,
+        }
+        for instruction in current_instructions
+    )
+
+    return text_parts
+
+
+def _get_system_instructions(
+    messages: "list[ModelMessage]",
+) -> "tuple[list[SystemPromptPart], list[str]]":
+    permanent_instructions = []
+    current_instructions = []
+
+    for msg in messages:
+        if hasattr(msg, "parts"):
+            for part in msg.parts:
+                if SystemPromptPart and isinstance(part, SystemPromptPart):
+                    permanent_instructions.append(part)
+
+        if hasattr(msg, "instructions") and msg.instructions is not None:
+            current_instructions.append(msg.instructions)
+
+    return permanent_instructions, current_instructions
+
+
 def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> None:
     """Set input messages data on a span."""
     if not _should_send_prompts():
@@ -56,29 +99,27 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non
     if not messages:
         return
 
+    permanent_instructions, current_instructions = _get_system_instructions(messages)
+    if len(permanent_instructions) > 0 or len(current_instructions) > 0:
+        set_data_normalized(
+            span,
+            SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
+            _transform_system_instructions(
+                permanent_instructions, current_instructions
+            ),
+            unpack=False,
+        )
+
     try:
         formatted_messages = []
-        system_prompt = None
-
-        # Extract system prompt from any ModelRequest with instructions
-        for msg in messages:
-            if hasattr(msg, "instructions") and msg.instructions:
-                system_prompt = msg.instructions
-                break
-
-        # Add system prompt as first message if present
-        if system_prompt:
-            formatted_messages.append(
-                {"role": "system", "content": [{"type": "text", "text": system_prompt}]}
-            )
 
         for msg in messages:
             if hasattr(msg, "parts"):
                 for part in msg.parts:
                     role = "user"
                     # Use isinstance checks with proper base classes
                     if SystemPromptPart and isinstance(part, SystemPromptPart):
-                        role = "system"
+                        continue
                     elif (
                         (TextPart and isinstance(part, TextPart))
                         or (ThinkingPart and isinstance(part, ThinkingPart))
diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py
@@ -514,7 +514,18 @@ async def test_model_settings(sentry_init, capture_events, test_agent_with_setti
 
 
 @pytest.mark.asyncio
-async def test_system_prompt_in_messages(sentry_init, capture_events):
+@pytest.mark.parametrize(
+    "send_default_pii, include_prompts",
+    [
+        (True, True),
+        (True, False),
+        (False, True),
+        (False, False),
+    ],
+)
+async def test_system_prompt_attribute(
+    sentry_init, capture_events, send_default_pii, include_prompts
+):
     """
     Test that system prompts are included as the first message.
     """
@@ -525,9 +536,9 @@ async def test_system_prompt_in_messages(sentry_init, capture_events):
     )
 
     sentry_init(
-        integrations=[PydanticAIIntegration()],
+        integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
-        send_default_pii=True,
+        send_default_pii=send_default_pii,
     )
 
     events = capture_events()
@@ -542,12 +553,17 @@ async def test_system_prompt_in_messages(sentry_init, capture_events):
     assert len(chat_spans) >= 1
 
     chat_span = chat_spans[0]
-    messages_str = chat_span["data"]["gen_ai.request.messages"]
 
-    # Messages is serialized as a string
-    # Should contain system role and helpful assistant text
-    assert "system" in messages_str
-    assert "helpful assistant" in messages_str
+    if send_default_pii and include_prompts:
+        system_instructions = chat_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+        assert json.loads(system_instructions) == [
+            {
+                "type": "text",
+                "content": "You are a helpful assistant specialized in testing.",
+            }
+        ]
+    else:
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["data"]
 
 
 @pytest.mark.asyncio
@@ -1184,7 +1200,18 @@ async def test_invoke_agent_with_list_user_prompt(sentry_init, capture_events):
 
 
 @pytest.mark.asyncio
-async def test_invoke_agent_with_instructions(sentry_init, capture_events):
+@pytest.mark.parametrize(
+    "send_default_pii, include_prompts",
+    [
+        (True, True),
+        (True, False),
+        (False, True),
+        (False, False),
+    ],
+)
+async def test_invoke_agent_with_instructions(
+    sentry_init, capture_events, send_default_pii, include_prompts
+):
     """
     Test that invoke_agent span handles instructions correctly.
     """
@@ -1201,24 +1228,32 @@ async def test_invoke_agent_with_instructions(sentry_init, capture_events):
     agent._system_prompts = ["System prompt"]
 
     sentry_init(
-        integrations=[PydanticAIIntegration()],
+        integrations=[PydanticAIIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
-        send_default_pii=True,
+        send_default_pii=send_default_pii,
     )
 
     events = capture_events()
 
     await agent.run("Test input")
 
     (transaction,) = events
+    spans = transaction["spans"]
 
-    # Check that the invoke_agent transaction has messages data
-    if "gen_ai.request.messages" in transaction["contexts"]["trace"]["data"]:
-        messages_str = transaction["contexts"]["trace"]["data"][
-            "gen_ai.request.messages"
+    # The transaction IS the invoke_agent span, check for messages in chat spans instead
+    chat_spans = [s for s in spans if s["op"] == "gen_ai.chat"]
+    assert len(chat_spans) >= 1
+
+    chat_span = chat_spans[0]
+
+    if send_default_pii and include_prompts:
+        system_instructions = chat_span["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS]
+        assert json.loads(system_instructions) == [
+            {"type": "text", "content": "System prompt"},
+            {"type": "text", "content": "Instruction 1\nInstruction 2"},
         ]
-        # Should contain both instructions and system prompts
-        assert "Instruction" in messages_str or "System prompt" in messages_str
+    else:
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_span["data"]
 
 
 @pytest.mark.asyncio