Skip to content

Commit 26cf31d

Browse files
feat(langchain): Set system instruction attribute (#5357)
Set the system instruction attribute on `ai_chat` spans in the `LangchainIntegration`. Handle both string and list content when extracting text from system messages.
1 parent a6170fc commit 26cf31d

File tree

2 files changed

+90
-10
lines changed

2 files changed

+90
-10
lines changed

sentry_sdk/integrations/langchain.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from uuid import UUID
3737

3838
from sentry_sdk.tracing import Span
39+
from sentry_sdk._types import TextPart
3940

4041

4142
try:
@@ -189,6 +190,40 @@ def _get_current_agent() -> "Optional[str]":
189190
return None
190191

191192

193+
def _get_system_instructions(messages: "List[List[BaseMessage]]") -> "List[str]":
194+
system_instructions = []
195+
196+
for list_ in messages:
197+
for message in list_:
198+
# type of content: str | list[str | dict] | None
199+
if message.type == "system" and isinstance(message.content, str):
200+
system_instructions.append(message.content)
201+
202+
elif message.type == "system" and isinstance(message.content, list):
203+
for item in message.content:
204+
if isinstance(item, str):
205+
system_instructions.append(item)
206+
207+
elif isinstance(item, dict) and item.get("type") == "text":
208+
instruction = item.get("text")
209+
if isinstance(instruction, str):
210+
system_instructions.append(instruction)
211+
212+
return system_instructions
213+
214+
215+
def _transform_system_instructions(
216+
system_instructions: "List[str]",
217+
) -> "List[TextPart]":
218+
return [
219+
{
220+
"type": "text",
221+
"content": instruction,
222+
}
223+
for instruction in system_instructions
224+
]
225+
226+
192227
class LangchainIntegration(Integration):
193228
identifier = "langchain"
194229
origin = f"auto.ai.{identifier}"
@@ -430,9 +465,21 @@ def on_chat_model_start(
430465
_set_tools_on_span(span, all_params.get("tools"))
431466

432467
if should_send_default_pii() and self.include_prompts:
468+
system_instructions = _get_system_instructions(messages)
469+
if len(system_instructions) > 0:
470+
set_data_normalized(
471+
span,
472+
SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS,
473+
_transform_system_instructions(system_instructions),
474+
unpack=False,
475+
)
476+
433477
normalized_messages = []
434478
for list_ in messages:
435479
for message in list_:
480+
if message.type == "system":
481+
continue
482+
436483
normalized_messages.append(
437484
self._normalize_langchain_message(message)
438485
)

tests/integrations/langchain/test_langchain.py

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,26 @@ def _llm_type(self) -> str:
7575
(False, False, True),
7676
],
7777
)
78+
@pytest.mark.parametrize(
79+
"system_instructions_content",
80+
[
81+
"You are very powerful assistant, but don't know current events",
82+
["You are a helpful assistant.", "Be concise and clear."],
83+
[
84+
{"type": "text", "text": "You are a helpful assistant."},
85+
{"type": "text", "text": "Be concise and clear."},
86+
],
87+
],
88+
ids=["string", "list", "blocks"],
89+
)
7890
def test_langchain_agent(
79-
sentry_init, capture_events, send_default_pii, include_prompts, use_unknown_llm_type
91+
sentry_init,
92+
capture_events,
93+
send_default_pii,
94+
include_prompts,
95+
use_unknown_llm_type,
96+
system_instructions_content,
97+
request,
8098
):
8199
global llm_type
82100
llm_type = "acme-llm" if use_unknown_llm_type else "openai-chat"
@@ -96,7 +114,7 @@ def test_langchain_agent(
96114
[
97115
(
98116
"system",
99-
"You are very powerful assistant, but don't know current events",
117+
system_instructions_content,
100118
),
101119
("user", "{input}"),
102120
MessagesPlaceholder(variable_name="agent_scratchpad"),
@@ -217,17 +235,30 @@ def test_langchain_agent(
217235
assert chat_spans[1]["data"]["gen_ai.usage.total_tokens"] == 117
218236

219237
if send_default_pii and include_prompts:
220-
assert (
221-
"You are very powerful"
222-
in chat_spans[0]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
223-
)
224238
assert "5" in chat_spans[0]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
225239
assert "word" in tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_INPUT]
226240
assert 5 == int(tool_exec_span["data"][SPANDATA.GEN_AI_TOOL_OUTPUT])
227-
assert (
228-
"You are very powerful"
229-
in chat_spans[1]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
230-
)
241+
242+
param_id = request.node.callspec.id
243+
if "string" in param_id:
244+
assert [
245+
{
246+
"type": "text",
247+
"content": "You are very powerful assistant, but don't know current events",
248+
}
249+
] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
250+
else:
251+
assert [
252+
{
253+
"type": "text",
254+
"content": "You are a helpful assistant.",
255+
},
256+
{
257+
"type": "text",
258+
"content": "Be concise and clear.",
259+
},
260+
] == json.loads(chat_spans[0]["data"][SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS])
261+
231262
assert "5" in chat_spans[1]["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
232263

233264
# Verify tool calls are recorded when PII is enabled
@@ -243,8 +274,10 @@ def test_langchain_agent(
243274
tool_call_str = str(tool_calls_data)
244275
assert "get_word_length" in tool_call_str
245276
else:
277+
assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[0].get("data", {})
246278
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[0].get("data", {})
247279
assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[0].get("data", {})
280+
assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in chat_spans[1].get("data", {})
248281
assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in chat_spans[1].get("data", {})
249282
assert SPANDATA.GEN_AI_RESPONSE_TEXT not in chat_spans[1].get("data", {})
250283
assert SPANDATA.GEN_AI_TOOL_INPUT not in tool_exec_span.get("data", {})

0 commit comments

Comments
 (0)