feat(langchain): Set system instruction attribute (#5357)

alexander-alderman-webb · web-flow · commit 26cf31d6208a · 2026-01-23T12:48:54.000+01:00
Set the system instruction attribute on `ai_chat` spans in the `LangchainIntegration`. Handle both string and list content when extracting text from system messages.
diff --git a/sentry_sdk/integrations/langchain.py b/sentry_sdk/integrations/langchain.py
@@ -36,6 +36,7 @@
     from uuid import UUID
 
     from sentry_sdk.tracing import Span
+    from sentry_sdk._types import TextPart
 
 
 try:
@@ -189,6 +190,40 @@ def _get_current_agent() -> "Optional[str]":
     return None
 
 
+def _get_system_instructions(messages: "List[List[BaseMessage]]") -> "List[str]":
+    system_instructions = []
+
+    for list_ in messages:
+        for message in list_:
+            # type of content: str | list[str | dict] | None
+            if message.type == "system" and isinstance(message.content, str):
+                system_instructions.append(message.content)
+
+            elif message.type == "system" and isinstance(message.content, list):
+                for item in message.content:
+                    if isinstance(item, str):
+                        system_instructions.append(item)
+
+                    elif isinstance(item, dict) and item.get("type") == "text":
+                        instruction = item.get("text")
+                        if isinstance(instruction, str):
+                            system_instructions.append(instruction)
+
+    return system_instructions
+
+
+def _transform_system_instructions(
+    system_instructions: "List[str]",
+) -> "List[TextPart]":
+    return [
+        {
+            "type": "text",
+            "content": instruction,
+        }
+        for instruction in system_instructions
+    ]
+
+
 class LangchainIntegration(Integration):
     identifier = "langchain"
     origin = f"auto.ai.{identifier}"
@@ -430,9 +465,21 @@ def on_chat_model_start(
             _set_tools_on_span(span, all_params.get("tools"))
 
             if should_send_default_pii() and self.include_prompts:
+                system_instructions = _get_system_instructions(messages)
+                if len(system_instructions) > 0:
+                    set_data_normalized(
+                        span,
+                        SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
+                        _transform_system_instructions(system_instructions),
+                        unpack=False,
+                    )
+
                 normalized_messages = []
                 for list_ in messages:
                     for message in list_:
+                        if message.type == "system":
+                            continue
+
                         normalized_messages.append(
                             self._normalize_langchain_message(message)
                         )
diff --git a/tests/integrations/langchain/test_langchain.py b/tests/integrations/langchain/test_langchain.py
@@ -75,8 +75,26 @@ def _llm_type(self) -> str:
         (False, False, True),
     ],
 )
+@pytest.mark.parametrize(
+    "system_instructions_content",
+    [
+        "You are very powerful assistant, but don't know current events",
+        ["You are a helpful assistant.", "Be concise and clear."],
+        [
+            {"type": "text", "text": "You are a helpful assistant."},
+            {"type": "text", "text": "Be concise and clear."},
+        ],
+    ],
+    ids=["string", "list", "blocks"],
+)
 def test_langchain_agent(
-    sentry_init, capture_events, send_default_pii, include_prompts, use_unknown_llm_type
+    sentry_init,
+    capture_events,
+    send_default_pii,
+    include_prompts,
+    use_unknown_llm_type,
+    system_instructions_content,
+    request,
 ):
     global llm_type
     llm_type = "acme-llm" if use_unknown_llm_type else "openai-chat"
@@ -96,7 +114,7 @@ def test_langchain_agent(
         [
             (
                 "system",
-                "You are very powerful assistant, but don't know current events",
+                system_instructions_content,
             ),
             ("user", "{input}"),
             MessagesPlaceholder(variable_name="agent_scratchpad"),
@@ -217,17 +235,30 @@ def test_langchain_agent(
         assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
 
     if send_default_pii and include_prompts:
-        assert (
-            "You are very powerful"
-            in chat_spans[0]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
         assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
         assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT]
         assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT])
-        assert (
-            "You are very powerful"
-            in chat_spans[1]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        )
+
+        param_id = request.node.callspec.id
+        if "string" in param_id:
+            assert [
+                {
+                    "type": "text",
+                    "content": "You are very powerful assistant, but don't know current events",
+                }
+            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+        else:
+            assert [
+                {
+                    "type": "text",
+                    "content": "You are a helpful assistant.",
+                },
+                {
+                    "type": "text",
+                    "content": "Be concise and clear.",
+                },
+            ] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
+
         assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
 
         # Verify tool calls are recorded when PII is enabled
@@ -243,8 +274,10 @@ def test_langchain_agent(
             tool_call_str = str(tool_calls_data)
             assert "get_word_length" in tool_call_str
     else:
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {})
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {})
         assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
+        assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {})
         assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {})
         assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {})
         assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {})