fix(google_genai): Redact binary data in inline_data and fix multi-part message extraction (#5977)

ericapisani · web-flow · commit 999667f276e5 · 2026-04-14T11:13:38.000-04:00
Redact binary/byte data that appears in `inline_data`, which fixes a failing `checkBinaryRedaction` assertion in the AI testing framework. Also includes changes for the following: - Properly handling lists of part-like items (merging into single multi-part user message) - Handling bare `inline_data` dicts that aren't wrapped in Part objects - Always substituting blob data (both bytes and base64 strings) - Moving PIL import to module level with availability flag to reduce all the dynamic imports of the module within the code Fixes PY-2287 and #5965
diff --git a/sentry_sdk/integrations/google_genai/utils.py b/sentry_sdk/integrations/google_genai/utils.py
@@ -31,7 +31,7 @@
     event_from_exception,
     safe_serialize,
 )
-from google.genai.types import GenerateContentConfig, Part, Content
+from google.genai.types import GenerateContentConfig, Part, Content, PartDict
 from itertools import chain
 
 if TYPE_CHECKING:
@@ -47,6 +47,18 @@
         ContentUnion,
     )
 
+_is_PIL_available = False
+try:
+    from PIL import Image as PILImage  # type: ignore[import-not-found]
+
+    _is_PIL_available = True
+except ImportError:
+    pass
+
+# Keys to use when checking to see if a dict provided by the user
+# is Part-like (as opposed to a Content or multi-turn conversation entry).
+_PART_DICT_KEYS = PartDict.__optional_keys__
+
 
 class UsageData(TypedDict):
     """Structure for token usage data."""
@@ -169,12 +181,23 @@ def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, A
     if isinstance(contents, str):
         return [{"role": "user", "content": contents}]
 
-    # Handle list case - process each item (non-recursive, flatten at top level)
+    # Handle list case
     if isinstance(contents, list):
-        for item in contents:
-            item_messages = extract_contents_messages(item)
-            messages.extend(item_messages)
-        return messages
+        if contents and all(_is_part_like(item) for item in contents):
+            # All items are parts — merge into a single multi-part user message
+            content_parts = []
+            for item in contents:
+                part = _extract_part_from_item(item)
+                if part is not None:
+                    content_parts.append(part)
+
+            return [{"role": "user", "content": content_parts}]
+        else:
+            # Multi-turn conversation or mixed content types
+            for item in contents:
+                item_messages = extract_contents_messages(item)
+                messages.extend(item_messages)
+            return messages
 
     # Handle dictionary case (ContentDict)
     if isinstance(contents, dict):
@@ -206,13 +229,23 @@ def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, A
             # Add tool messages
             messages.extend(tool_messages)
         elif "text" in contents:
-            # Simple text in dict
             messages.append(
                 {
-                    "role": role or "user",
+                    "role": role,
                     "content": [{"text": contents["text"], "type": "text"}],
                 }
             )
+        elif "inline_data" in contents:
+            # The "data" will always be bytes (or bytes within a string),
+            # so if this is present, it's safe to automatically substitute with the placeholder
+            messages.append(
+                {
+                    "inline_data": {
+                        "mime_type": contents["inline_data"].get("mime_type", ""),
+                        "data": BLOB_DATA_SUBSTITUTE,
+                    }
+                }
+            )
 
         return messages
 
@@ -248,15 +281,10 @@ def extract_contents_messages(contents: "ContentListUnion") -> "List[Dict[str, A
             return [{"role": "user", "content": [part_result]}]
 
     # Handle PIL.Image.Image
-    try:
-        from PIL import Image as PILImage  # type: ignore[import-not-found]
-
-        if isinstance(contents, PILImage.Image):
-            blob_part = _extract_pil_image(contents)
-            if blob_part:
-                return [{"role": "user", "content": [blob_part]}]
-    except ImportError:
-        pass
+    if _is_PIL_available and isinstance(contents, PILImage.Image):
+        blob_part = _extract_pil_image(contents)
+        if blob_part:
+            return [{"role": "user", "content": [blob_part]}]
 
     # Handle File object
     if hasattr(contents, "uri") and hasattr(contents, "mime_type"):
@@ -310,11 +338,9 @@ def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]":
         if result is not None:
             # For inline_data with bytes data, substitute the content
             if "inline_data" in part:
-                inline_data = part["inline_data"]
-                if isinstance(inline_data, dict) and isinstance(
-                    inline_data.get("data"), bytes
-                ):
-                    result["content"] = BLOB_DATA_SUBSTITUTE
+                # inline_data.data will always be bytes, or a string containing base64-encoded bytes,
+                # so can automatically substitute without further checks
+                result["content"] = BLOB_DATA_SUBSTITUTE
             return result
 
         return None
@@ -357,18 +383,11 @@ def _extract_part_content(part: "Any") -> "Optional[dict[str, Any]]":
             if mime_type is None:
                 mime_type = ""
 
-            # Handle both bytes (binary data) and str (base64-encoded data)
-            if isinstance(data, bytes):
-                content = BLOB_DATA_SUBSTITUTE
-            else:
-                # For non-bytes data (e.g., base64 strings), use as-is
-                content = data
-
             return {
                 "type": "blob",
                 "modality": get_modality_from_mime_type(mime_type),
                 "mime_type": mime_type,
-                "content": content,
+                "content": BLOB_DATA_SUBSTITUTE,
             }
 
     return None
@@ -429,25 +448,78 @@ def _extract_tool_message_from_part(part: "Any") -> "Optional[dict[str, Any]]":
 
 def _extract_pil_image(image: "Any") -> "Optional[dict[str, Any]]":
     """Extract blob part from PIL.Image.Image."""
-    try:
-        from PIL import Image as PILImage
+    if not _is_PIL_available or not isinstance(image, PILImage.Image):
+        return None
 
-        if not isinstance(image, PILImage.Image):
-            return None
+    # Get format, default to JPEG
+    format_str = image.format or "JPEG"
+    suffix = format_str.lower()
+    mime_type = f"image/{suffix}"
+
+    return {
+        "type": "blob",
+        "modality": get_modality_from_mime_type(mime_type),
+        "mime_type": mime_type,
+        "content": BLOB_DATA_SUBSTITUTE,
+    }
 
-        # Get format, default to JPEG
-        format_str = image.format or "JPEG"
-        suffix = format_str.lower()
-        mime_type = f"image/{suffix}"
 
+def _is_part_like(item: "Any") -> bool:
+    """Check if item is a part-like value (PartUnionDict) rather than a Content/multi-turn entry."""
+    if isinstance(item, (str, Part)):
+        return True
+    if isinstance(item, (list, Content)):
+        return False
+    if isinstance(item, dict):
+        if "role" in item or "parts" in item:
+            return False
+        # Part objects that came in as plain dicts
+        return bool(_PART_DICT_KEYS & item.keys())
+    # File objects
+    if hasattr(item, "uri"):
+        return True
+    # PIL.Image
+    if _is_PIL_available and isinstance(item, PILImage.Image):
+        return True
+    return False
+
+
+def _extract_part_from_item(item: "Any") -> "Optional[dict[str, Any]]":
+    """Convert a single part-like item to a content part dict."""
+    if isinstance(item, str):
+        return {"text": item, "type": "text"}
+
+    # Handle bare inline_data dicts directly to preserve the raw format
+    if isinstance(item, dict) and "inline_data" in item:
         return {
-            "type": "blob",
-            "modality": get_modality_from_mime_type(mime_type),
-            "mime_type": mime_type,
-            "content": BLOB_DATA_SUBSTITUTE,
+            "inline_data": {
+                "mime_type": item["inline_data"].get("mime_type", ""),
+                "data": BLOB_DATA_SUBSTITUTE,
+            }
         }
-    except Exception:
-        return None
+
+    # For other dicts and Part objects, use existing _extract_part_content
+    result = _extract_part_content(item)
+    if result is not None:
+        return result
+
+    # PIL.Image
+    if _is_PIL_available and isinstance(item, PILImage.Image):
+        return _extract_pil_image(item)
+
+    # File objects
+    if hasattr(item, "uri") and hasattr(item, "mime_type"):
+        file_uri = getattr(item, "uri", None)
+        mime_type = getattr(item, "mime_type", None) or ""
+        if file_uri is not None:
+            return {
+                "type": "uri",
+                "modality": get_modality_from_mime_type(mime_type),
+                "mime_type": mime_type,
+                "uri": file_uri,
+            }
+
+    return None
 
 
 def extract_contents_text(contents: "ContentListUnion") -> "Optional[str]":
diff --git a/tests/integrations/google_genai/test_google_genai.py b/tests/integrations/google_genai/test_google_genai.py
@@ -941,11 +941,9 @@ def test_google_genai_message_truncation(
     assert isinstance(parsed_messages, list)
     assert len(parsed_messages) == 1
     assert parsed_messages[0]["role"] == "user"
-    assert small_content in parsed_messages[0]["content"]
 
-    assert (
-        event["_meta"]["spans"]["0"]["data"]["gen_ai.request.messages"][""]["len"] == 2
-    )
+    # What "small content" becomes because the large message used the entire character limit
+    assert "..." in parsed_messages[0]["content"][1]["text"]
 
 
 # Sample embed content API response JSON
@@ -1594,6 +1592,12 @@ def test_generate_content_with_function_response(
 
     mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
 
+    # Conversation with the function call from the model
+    function_call = genai_types.FunctionCall(
+        name="get_weather",
+        args={"location": "Paris"},
+    )
+
     # Conversation with function response (tool result)
     function_response = genai_types.FunctionResponse(
         id="call_123", name="get_weather", response={"output": "Sunny, 72F"}
@@ -1602,6 +1606,9 @@ def test_generate_content_with_function_response(
         genai_types.Content(
             role="user", parts=[genai_types.Part(text="What's the weather in Paris?")]
         ),
+        genai_types.Content(
+            role="model", parts=[genai_types.Part(function_call=function_call)]
+        ),
         genai_types.Content(
             role="user", parts=[genai_types.Part(function_response=function_response)]
         ),
@@ -1707,7 +1714,13 @@ def test_generate_content_with_part_object_directly(
 def test_generate_content_with_list_of_dicts(
     sentry_init, capture_events, mock_genai_client
 ):
-    """Test generate_content with list of dict format inputs."""
+    """
+    Test generate_content with list of dict format inputs.
+
+    We only keep (and assert) the last dict in `content` because we've made popping the last message a form of
+    message truncation to keep the span size within limits. If we were following OTEL conventions, all 3 dicts
+    would be present.
+    """
     sentry_init(
         integrations=[GoogleGenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
@@ -1787,6 +1800,98 @@ def test_generate_content_with_dict_inline_data(
     assert messages[0]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE
 
 
+def test_generate_content_without_parts_property_inline_data(
+    sentry_init, capture_events, mock_genai_client
+):
+    sentry_init(
+        integrations=[GoogleGenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
+
+    contents = [
+        {"text": "What's in this image?"},
+        {"inline_data": {"data": b"fake_binary_data", "mime_type": "image/gif"}},
+    ]
+
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=contents, config=create_test_config()
+            )
+
+    (event,) = events
+    invoke_span = event["spans"][0]
+
+    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+
+    assert len(messages) == 1
+
+    assert len(messages[0]["content"]) == 2
+    assert messages[0]["role"] == "user"
+    assert messages[0]["content"][0] == {
+        "text": "What's in this image?",
+        "type": "text",
+    }
+    assert messages[0]["content"][1]["inline_data"]
+
+    assert messages[0]["content"][1]["inline_data"]["data"] == BLOB_DATA_SUBSTITUTE
+    assert messages[0]["content"][1]["inline_data"]["mime_type"] == "image/gif"
+
+
+def test_generate_content_without_parts_property_inline_data_and_binary_data_within_string(
+    sentry_init, capture_events, mock_genai_client
+):
+    sentry_init(
+        integrations=[GoogleGenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    mock_http_response = create_mock_http_response(EXAMPLE_API_RESPONSE_JSON)
+
+    contents = [
+        {"text": "What's in this image?"},
+        {
+            "inline_data": {
+                "data": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8z8BQz0AEYBxVSF+FABJADveWkH6oAAAAAElFTkSuQmCC",
+                "mime_type": "image/png",
+            }
+        },
+    ]
+
+    with mock.patch.object(
+        mock_genai_client._api_client, "request", return_value=mock_http_response
+    ):
+        with start_transaction(name="google_genai"):
+            mock_genai_client.models.generate_content(
+                model="gemini-1.5-flash", contents=contents, config=create_test_config()
+            )
+
+    (event,) = events
+    invoke_span = event["spans"][0]
+
+    messages = json.loads(invoke_span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES])
+    assert len(messages) == 1
+    assert messages[0]["role"] == "user"
+
+    assert len(messages[0]["content"]) == 2
+    assert messages[0]["content"][0] == {
+        "text": "What's in this image?",
+        "type": "text",
+    }
+    assert messages[0]["content"][1]["inline_data"]
+
+    assert messages[0]["content"][1]["inline_data"]["data"] == BLOB_DATA_SUBSTITUTE
+    assert messages[0]["content"][1]["inline_data"]["mime_type"] == "image/png"
+
+
 # Tests for extract_contents_messages function
 def test_extract_contents_messages_none():
     """Test extract_contents_messages with None input"""