Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions sentry_sdk/integrations/langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,15 @@
OllamaEmbeddings = None


def _get_ai_system(all_params: "Dict[str, Any]") -> "Optional[str]":
ai_type = all_params.get("_type")

if not ai_type or not isinstance(ai_type, str):
return None

return ai_type
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raw _type values break gen_ai.system consistency with other integrations

Medium Severity

_get_ai_system returns the raw LangChain _type value (e.g. "anthropic-chat", "openai-chat") instead of a normalized provider name. The Anthropic integration sets gen_ai.system to "anthropic" and the OpenAI integration uses "openai", matching OTel semantic conventions. The old code also normalized to these values. Now the same provider gets different gen_ai.system values depending on whether it's called directly or through LangChain, breaking filtering/grouping in the Sentry UI for existing users.

Additional Locations (1)
Fix in Cursor Fix in Web

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is intentional, see #5707 (comment)



DATA_FIELDS = {
"frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
"function_call": SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
Expand Down Expand Up @@ -380,11 +389,9 @@ def on_llm_start(
model,
)

ai_type = all_params.get("_type", "")
if "anthropic" in ai_type:
span.set_data(SPANDATA.GEN_AI_SYSTEM, "anthropic")
elif "openai" in ai_type:
span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai")
ai_system = _get_ai_system(all_params)
if ai_system:
span.set_data(SPANDATA.GEN_AI_SYSTEM, ai_system)

for key, attribute in DATA_FIELDS.items():
if key in all_params and all_params[key] is not None:
Expand Down Expand Up @@ -448,11 +455,9 @@ def on_chat_model_start(
if model:
span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model)

ai_type = all_params.get("_type", "")
if "anthropic" in ai_type:
span.set_data(SPANDATA.GEN_AI_SYSTEM, "anthropic")
elif "openai" in ai_type:
span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai")
ai_system = _get_ai_system(all_params)
if ai_system:
span.set_data(SPANDATA.GEN_AI_SYSTEM, ai_system)

agent_name = _get_current_agent()
if agent_name:
Expand Down
88 changes: 88 additions & 0 deletions tests/integrations/langchain/test_langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -2226,6 +2226,94 @@
}


@pytest.mark.parametrize(
"ai_type,expected_system",
[
# Real LangChain _type values (from _llm_type properties)
# OpenAI
("openai-chat", "openai-chat"),
("openai", "openai"),
# Azure OpenAI
("azure-openai-chat", "azure-openai-chat"),
("azure", "azure"),
# Anthropic
("anthropic-chat", "anthropic-chat"),
# Google
("vertexai", "vertexai"),
("chat-google-generative-ai", "chat-google-generative-ai"),
("google_gemini", "google_gemini"),
# AWS Bedrock
("amazon_bedrock_chat", "amazon_bedrock_chat"),
("amazon_bedrock", "amazon_bedrock"),
# Cohere
("cohere-chat", "cohere-chat"),
# Ollama
("chat-ollama", "chat-ollama"),
("ollama-llm", "ollama-llm"),
# Mistral
("mistralai-chat", "mistralai-chat"),
# Fireworks
("fireworks-chat", "fireworks-chat"),
("fireworks", "fireworks"),
# HuggingFace
("huggingface-chat-wrapper", "huggingface-chat-wrapper"),
# Groq
("groq-chat", "groq-chat"),
# NVIDIA
("chat-nvidia-ai-playground", "chat-nvidia-ai-playground"),
# xAI
("xai-chat", "xai-chat"),
# DeepSeek
("chat-deepseek", "chat-deepseek"),
# Edge cases
("", None),
(None, None),
],
)
def test_langchain_ai_system_detection(
sentry_init, capture_events, ai_type, expected_system
):
sentry_init(
integrations=[LangchainIntegration()],
traces_sample_rate=1.0,
)
events = capture_events()

callback = SentryLangchainCallback(max_span_map_size=100, include_prompts=True)

run_id = "test-ai-system-uuid"
serialized = {"_type": ai_type} if ai_type is not None else {}
prompts = ["Test prompt"]

with start_transaction():
callback.on_llm_start(
serialized=serialized,
prompts=prompts,
run_id=run_id,
invocation_params={"_type": ai_type, "model": "test-model"},
)

generation = Mock(text="Test response", message=None)
response = Mock(generations=[[generation]])
callback.on_llm_end(response=response, run_id=run_id)

assert len(events) > 0
tx = events[0]
assert tx["type"] == "transaction"

llm_spans = [
span for span in tx.get("spans", []) if span.get("op") == "gen_ai.pipeline"
]
assert len(llm_spans) > 0

Check failure on line 2307 in tests/integrations/langchain/test_langchain.py

View check run for this annotation

@sentry/warden / warden: code-review

Test filters for wrong span operation type, causing test to fail

The test filters for spans with `op == "gen_ai.pipeline"` (line 2305), but `on_llm_start` creates spans with `op=OP.GEN_AI_GENERATE_TEXT` which equals `"gen_ai.generate_text"`. This means `llm_spans` will always be empty, causing the assertion `assert len(llm_spans) > 0` to fail. The filter should use `"gen_ai.generate_text"` to match what `on_llm_start` actually creates.

Check warning on line 2307 in tests/integrations/langchain/test_langchain.py

View check run for this annotation

@sentry/warden / warden: find-bugs

Test filters for wrong span operation type, causing test to fail

The test filters for spans with `op == "gen_ai.pipeline"` at lines 2304-2305, but `on_llm_start` creates spans with `op=OP.GEN_AI_GENERATE_TEXT` which equals `"gen_ai.generate_text"`. This means `llm_spans` will always be empty and the assertion `len(llm_spans) > 0` will fail, making this test non-functional for validating AI system detection.

llm_span = llm_spans[0]

if expected_system is not None:
assert llm_span["data"][SPANDATA.GEN_AI_SYSTEM] == expected_system
else:
assert SPANDATA.GEN_AI_SYSTEM not in llm_span.get("data", {})


class TestTransformLangchainMessageContent:
"""Tests for _transform_langchain_message_content function."""

Expand Down
Loading