Skip to content
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
1f32952
fix(ai): redact message parts content of type blob
constantinius Dec 17, 2025
795bcea
fix(ai): skip non dict messages
constantinius Dec 17, 2025
a623e13
fix(ai): typing
constantinius Dec 17, 2025
3d3ce5b
fix(ai): content items may not be dicts
constantinius Dec 17, 2025
36fcaf9
fix(litellm): fix `gen_ai.request.messages` to be as expected
constantinius Dec 17, 2025
d9d1264
tests: add tests for litellm message conversion
constantinius Dec 17, 2025
4a17806
fix(integrations): ensure _convert_message_parts does not mutate orig…
constantinius Jan 8, 2026
db071c2
Merge branch 'master' into constantinius/fix/integrations/litellm-rep…
constantinius Jan 13, 2026
280202f
fix: addressing review comments and fix test
constantinius Jan 13, 2026
97cc614
Merge branch 'master' into constantinius/fix/integrations/litellm-rep…
constantinius Jan 14, 2026
8cde746
fix(integrations): using common function to parse data URIs
constantinius Jan 14, 2026
bbab566
fix: litellm test errors
constantinius Jan 15, 2026
869cb42
fix: docstring
constantinius Jan 15, 2026
bd78165
feat(ai): Add shared content transformation functions for multimodal …
constantinius Jan 15, 2026
c2aac53
Merge shared content transformation functions
constantinius Jan 15, 2026
026992f
refactor(litellm): Use shared transform_message_content from ai/utils
constantinius Jan 15, 2026
412b93e
refactor(ai): split transform_content_part into SDK-specific functions
constantinius Jan 15, 2026
15c63ff
Merge SDK-specific transform functions
constantinius Jan 15, 2026
88c04f9
refactor(litellm): use transform_openai_content_part directly
constantinius Jan 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
237 changes: 237 additions & 0 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,243 @@ def parse_data_uri(url: str) -> "Tuple[str, str]":
return mime_type, content


def get_modality_from_mime_type(mime_type: str) -> str:
"""
Infer the content modality from a MIME type string.

Args:
mime_type: A MIME type string (e.g., "image/jpeg", "audio/mp3")

Returns:
One of: "image", "audio", "video", or "document"
Defaults to "image" for unknown or empty MIME types.

Examples:
"image/jpeg" -> "image"
"audio/mp3" -> "audio"
"video/mp4" -> "video"
"application/pdf" -> "document"
"text/plain" -> "document"
"""
if not mime_type:
return "image" # Default fallback

mime_lower = mime_type.lower()
if mime_lower.startswith("image/"):
return "image"
elif mime_lower.startswith("audio/"):
return "audio"
elif mime_lower.startswith("video/"):
return "video"
elif mime_lower.startswith("application/") or mime_lower.startswith("text/"):
return "document"
else:
return "image" # Default fallback for unknown types


def transform_content_part(
content_part: "Dict[str, Any]",
) -> "Optional[Dict[str, Any]]":
"""
Transform a content part from various AI SDK formats to Sentry's standardized format.

Supported input formats:
- OpenAI/LiteLLM: {"type": "image_url", "image_url": {"url": "..."}}
- Anthropic: {"type": "image|document", "source": {"type": "base64|url|file", ...}}
- Google: {"inline_data": {...}} or {"file_data": {...}}
- Generic: {"type": "image|audio|video|file", "base64|url|file_id": "...", "mime_type": "..."}

Output format (one of):
- {"type": "blob", "modality": "...", "mime_type": "...", "content": "..."}
- {"type": "uri", "modality": "...", "mime_type": "...", "uri": "..."}
- {"type": "file", "modality": "...", "mime_type": "...", "file_id": "..."}

Args:
content_part: A dictionary representing a content part from an AI SDK

Returns:
A transformed dictionary in standardized format, or None if the format
is unrecognized or transformation fails.
"""
if not isinstance(content_part, dict):
return None

block_type = content_part.get("type")

# Handle OpenAI/LiteLLM image_url format
# {"type": "image_url", "image_url": {"url": "..."}} or {"type": "image_url", "image_url": "..."}
if block_type == "image_url":
image_url_data = content_part.get("image_url")
if isinstance(image_url_data, str):
url = image_url_data
elif isinstance(image_url_data, dict):
url = image_url_data.get("url", "")
else:
return None

if not url:
return None

# Check if it's a data URI (base64 encoded)
if url.startswith("data:"):
try:
mime_type, content = parse_data_uri(url)
return {
"type": "blob",
"modality": get_modality_from_mime_type(mime_type),
"mime_type": mime_type,
"content": content,
}
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated
except ValueError:
# If parsing fails, return as URI
return {
"type": "uri",
"modality": "image",
"mime_type": "",
"uri": url,
}
else:
# Regular URL
return {
"type": "uri",
"modality": "image",
"mime_type": "",
"uri": url,
}

# Handle Anthropic format with source dict
# {"type": "image|document", "source": {"type": "base64|url|file", "media_type": "...", "data|url|file_id": "..."}}
if block_type in ("image", "document") and "source" in content_part:
source = content_part.get("source")
if not isinstance(source, dict):
return None

source_type = source.get("type")
media_type = source.get("media_type", "")
modality = (
"document"
if block_type == "document"
else get_modality_from_mime_type(media_type)
)

if source_type == "base64":
return {
"type": "blob",
"modality": modality,
"mime_type": media_type,
"content": source.get("data", ""),
}
elif source_type == "url":
return {
"type": "uri",
"modality": modality,
"mime_type": media_type,
"uri": source.get("url", ""),
}
elif source_type == "file":
return {
"type": "file",
"modality": modality,
"mime_type": media_type,
"file_id": source.get("file_id", ""),
}
return None

# Handle Google inline_data format
# {"inline_data": {"mime_type": "...", "data": "..."}}
if "inline_data" in content_part:
inline_data = content_part.get("inline_data")
if isinstance(inline_data, dict):
mime_type = inline_data.get("mime_type", "")
return {
"type": "blob",
"modality": get_modality_from_mime_type(mime_type),
"mime_type": mime_type,
"content": inline_data.get("data", ""),
}
return None

# Handle Google file_data format
# {"file_data": {"mime_type": "...", "file_uri": "..."}}
if "file_data" in content_part:
file_data = content_part.get("file_data")
if isinstance(file_data, dict):
mime_type = file_data.get("mime_type", "")
return {
"type": "uri",
"modality": get_modality_from_mime_type(mime_type),
"mime_type": mime_type,
"uri": file_data.get("file_uri", ""),
}
return None

# Handle generic format with direct fields (LangChain style)
# {"type": "image|audio|video|file", "base64|url|file_id": "...", "mime_type": "..."}
if block_type in ("image", "audio", "video", "file"):
mime_type = content_part.get("mime_type", "")
modality = block_type if block_type != "file" else "document"

# Check for base64 encoded content
if "base64" in content_part:
return {
"type": "blob",
"modality": modality,
"mime_type": mime_type,
"content": content_part.get("base64", ""),
}
# Check for URL reference
elif "url" in content_part:
return {
"type": "uri",
"modality": modality,
"mime_type": mime_type,
"uri": content_part.get("url", ""),
}
# Check for file_id reference
elif "file_id" in content_part:
return {
"type": "file",
"modality": modality,
"mime_type": mime_type,
"file_id": content_part.get("file_id", ""),
}

# Unrecognized format
return None


def transform_message_content(content: "Any") -> "Any":
"""
Transform message content, handling both string content and list of content blocks.

For list content, each item is transformed using transform_content_part().
Items that cannot be transformed (return None) are kept as-is.

Args:
content: Message content - can be a string, list of content blocks, or other

Returns:
- String content: returned as-is
- List content: list with each transformable item converted to standardized format
- Other: returned as-is
"""
if isinstance(content, str):
return content

if isinstance(content, (list, tuple)):
transformed = []
for item in content:
if isinstance(item, dict):
result = transform_content_part(item)
# If transformation succeeded, use the result; otherwise keep original
transformed.append(result if result is not None else item)
else:
transformed.append(item)
return transformed

return content


def _normalize_data(data: "Any", unpack: bool = True) -> "Any":
# convert pydantic data (e.g. OpenAI v1+) to json compatible format
if hasattr(data, "model_dump"):
Expand Down
23 changes: 22 additions & 1 deletion sentry_sdk/integrations/litellm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
from typing import TYPE_CHECKING

import sentry_sdk
Expand All @@ -7,14 +8,15 @@
get_start_span_function,
set_data_normalized,
truncate_and_annotate_messages,
transform_message_content,
)
from sentry_sdk.consts import SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
from sentry_sdk.utils import event_from_exception

if TYPE_CHECKING:
from typing import Any, Dict
from typing import Any, Dict, List
from datetime import datetime

try:
Expand All @@ -36,6 +38,24 @@ def _get_metadata_dict(kwargs: "Dict[str, Any]") -> "Dict[str, Any]":
return metadata


def _convert_message_parts(messages: "List[Dict[str, Any]]") -> "List[Dict[str, Any]]":
"""
Convert the message parts from OpenAI format to the `gen_ai.request.messages` format
using the shared transform_message_content function.

Deep copies messages to avoid mutating original kwargs.
"""
# Deep copy to avoid mutating original messages from kwargs
messages = copy.deepcopy(messages)

for message in messages:
if not isinstance(message, dict):
continue
if "content" in message:
message["content"] = transform_message_content(message["content"])
return messages


def _input_callback(kwargs: "Dict[str, Any]") -> None:
"""Handle the start of a request."""
integration = sentry_sdk.get_client().get_integration(LiteLLMIntegration)
Expand Down Expand Up @@ -102,6 +122,7 @@ def _input_callback(kwargs: "Dict[str, Any]") -> None:
messages = kwargs.get("messages", [])
if messages:
scope = sentry_sdk.get_current_scope()
messages = _convert_message_parts(messages)
messages_data = truncate_and_annotate_messages(messages, span, scope)
if messages_data is not None:
set_data_normalized(
Expand Down
Loading
Loading