Skip to content

Commit 1f32952

Browse files
committed
fix(ai): redact message parts content of type blob
1 parent 86d326c commit 1f32952

File tree

2 files changed

+156
-1
lines changed

2 files changed

+156
-1
lines changed

sentry_sdk/ai/utils.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from sys import getsizeof
66
from typing import TYPE_CHECKING
77

8+
from sentry_sdk._types import SENSITIVE_DATA_SUBSTITUTE
9+
810
if TYPE_CHECKING:
911
from typing import Any, Callable, Dict, List, Optional, Tuple
1012

@@ -141,6 +143,53 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
141143
return 0
142144

143145

146+
def redact_blob_message_parts(messages):
147+
# type: (List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int]
148+
"""
149+
Redact blob message parts from the messages, by removing the "content" key.
150+
e.g:
151+
{
152+
"role": "user",
153+
"content": [
154+
{
155+
"text": "How many ponies do you see in the image?",
156+
"type": "text"
157+
},
158+
{
159+
"type": "blob",
160+
"modality": "image",
161+
"mime_type": "image/jpeg",
162+
"content": "data:image/jpeg;base64,..."
163+
}
164+
]
165+
}
166+
becomes:
167+
{
168+
"role": "user",
169+
"content": [
170+
{
171+
"text": "How many ponies do you see in the image?",
172+
"type": "text"
173+
},
174+
{
175+
"type": "blob",
176+
"modality": "image",
177+
"mime_type": "image/jpeg",
178+
"content": "[Filtered]"
179+
}
180+
]
181+
}
182+
"""
183+
184+
for message in messages:
185+
content = message.get("content")
186+
if isinstance(content, list):
187+
for item in content:
188+
if item.get("type") == "blob":
189+
item["content"] = SENSITIVE_DATA_SUBSTITUTE
190+
return messages
191+
192+
144193
def truncate_messages_by_size(
145194
messages: "List[Dict[str, Any]]",
146195
max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
@@ -186,6 +235,8 @@ def truncate_and_annotate_messages(
186235
if not messages:
187236
return None
188237

238+
messages = redact_blob_message_parts(messages)
239+
189240
truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
190241
if removed_count > 0:
191242
scope._gen_ai_original_message_count[span.span_id] = len(messages)

tests/test_ai_monitoring.py

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pytest
55

66
import sentry_sdk
7-
from sentry_sdk._types import AnnotatedValue
7+
from sentry_sdk._types import AnnotatedValue, SENSITIVE_DATA_SUBSTITUTE
88
from sentry_sdk.ai.monitoring import ai_track
99
from sentry_sdk.ai.utils import (
1010
MAX_GEN_AI_MESSAGE_BYTES,
@@ -13,6 +13,7 @@
1313
truncate_and_annotate_messages,
1414
truncate_messages_by_size,
1515
_find_truncation_index,
16+
redact_blob_message_parts,
1617
)
1718
from sentry_sdk.serializer import serialize
1819
from sentry_sdk.utils import safe_serialize
@@ -542,3 +543,106 @@ def __init__(self):
542543
assert isinstance(messages_value, AnnotatedValue)
543544
assert messages_value.metadata["len"] == stored_original_length
544545
assert len(messages_value.value) == len(truncated_messages)
546+
547+
548+
class TestRedactBlobMessageParts:
549+
def test_redacts_single_blob_content(self):
550+
"""Test that blob content is redacted in a message with single blob part"""
551+
messages = [
552+
{
553+
"role": "user",
554+
"content": [
555+
{
556+
"text": "How many ponies do you see in the image?",
557+
"type": "text",
558+
},
559+
{
560+
"type": "blob",
561+
"modality": "image",
562+
"mime_type": "image/jpeg",
563+
"content": "data:image/jpeg;base64,/9j/4AAQSkZJRg==",
564+
},
565+
],
566+
}
567+
]
568+
569+
result = redact_blob_message_parts(messages)
570+
571+
assert result == messages # Returns the same list
572+
assert (
573+
messages[0]["content"][0]["text"]
574+
== "How many ponies do you see in the image?"
575+
)
576+
assert messages[0]["content"][0]["type"] == "text"
577+
assert messages[0]["content"][1]["type"] == "blob"
578+
assert messages[0]["content"][1]["modality"] == "image"
579+
assert messages[0]["content"][1]["mime_type"] == "image/jpeg"
580+
assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
581+
582+
def test_redacts_multiple_blob_parts(self):
583+
"""Test that multiple blob parts in a single message are all redacted"""
584+
messages = [
585+
{
586+
"role": "user",
587+
"content": [
588+
{"text": "Compare these images", "type": "text"},
589+
{
590+
"type": "blob",
591+
"modality": "image",
592+
"mime_type": "image/jpeg",
593+
"content": "data:image/jpeg;base64,first_image",
594+
},
595+
{
596+
"type": "blob",
597+
"modality": "image",
598+
"mime_type": "image/png",
599+
"content": "data:image/png;base64,second_image",
600+
},
601+
],
602+
}
603+
]
604+
605+
result = redact_blob_message_parts(messages)
606+
607+
assert result == messages
608+
assert messages[0]["content"][0]["text"] == "Compare these images"
609+
assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
610+
assert messages[0]["content"][2]["content"] == SENSITIVE_DATA_SUBSTITUTE
611+
612+
def test_redacts_blobs_in_multiple_messages(self):
613+
"""Test that blob parts are redacted across multiple messages"""
614+
messages = [
615+
{
616+
"role": "user",
617+
"content": [
618+
{"text": "First message", "type": "text"},
619+
{
620+
"type": "blob",
621+
"modality": "image",
622+
"content": "data:image/jpeg;base64,first",
623+
},
624+
],
625+
},
626+
{
627+
"role": "assistant",
628+
"content": "I see the image.",
629+
},
630+
{
631+
"role": "user",
632+
"content": [
633+
{"text": "Second message", "type": "text"},
634+
{
635+
"type": "blob",
636+
"modality": "image",
637+
"content": "data:image/jpeg;base64,second",
638+
},
639+
],
640+
},
641+
]
642+
643+
result = redact_blob_message_parts(messages)
644+
645+
assert result == messages
646+
assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE
647+
assert messages[1]["content"] == "I see the image." # Unchanged
648+
assert messages[2]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE

0 commit comments

Comments
 (0)