|
4 | 4 | import pytest |
5 | 5 |
|
6 | 6 | import sentry_sdk |
7 | | -from sentry_sdk._types import AnnotatedValue |
| 7 | +from sentry_sdk._types import AnnotatedValue, SENSITIVE_DATA_SUBSTITUTE |
8 | 8 | from sentry_sdk.ai.monitoring import ai_track |
9 | 9 | from sentry_sdk.ai.utils import ( |
10 | 10 | MAX_GEN_AI_MESSAGE_BYTES, |
|
13 | 13 | truncate_and_annotate_messages, |
14 | 14 | truncate_messages_by_size, |
15 | 15 | _find_truncation_index, |
| 16 | + redact_blob_message_parts, |
16 | 17 | ) |
17 | 18 | from sentry_sdk.serializer import serialize |
18 | 19 | from sentry_sdk.utils import safe_serialize |
@@ -542,3 +543,106 @@ def __init__(self): |
542 | 543 | assert isinstance(messages_value, AnnotatedValue) |
543 | 544 | assert messages_value.metadata["len"] == stored_original_length |
544 | 545 | assert len(messages_value.value) == len(truncated_messages) |
| 546 | + |
| 547 | + |
| 548 | +class TestRedactBlobMessageParts: |
| 549 | + def test_redacts_single_blob_content(self): |
| 550 | + """Test that blob content is redacted in a message with single blob part""" |
| 551 | + messages = [ |
| 552 | + { |
| 553 | + "role": "user", |
| 554 | + "content": [ |
| 555 | + { |
| 556 | + "text": "How many ponies do you see in the image?", |
| 557 | + "type": "text", |
| 558 | + }, |
| 559 | + { |
| 560 | + "type": "blob", |
| 561 | + "modality": "image", |
| 562 | + "mime_type": "image/jpeg", |
| 563 | + "content": "data:image/jpeg;base64,/9j/4AAQSkZJRg==", |
| 564 | + }, |
| 565 | + ], |
| 566 | + } |
| 567 | + ] |
| 568 | + |
| 569 | + result = redact_blob_message_parts(messages) |
| 570 | + |
| 571 | + assert result == messages # Returns the same list |
| 572 | + assert ( |
| 573 | + messages[0]["content"][0]["text"] |
| 574 | + == "How many ponies do you see in the image?" |
| 575 | + ) |
| 576 | + assert messages[0]["content"][0]["type"] == "text" |
| 577 | + assert messages[0]["content"][1]["type"] == "blob" |
| 578 | + assert messages[0]["content"][1]["modality"] == "image" |
| 579 | + assert messages[0]["content"][1]["mime_type"] == "image/jpeg" |
| 580 | + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE |
| 581 | + |
| 582 | + def test_redacts_multiple_blob_parts(self): |
| 583 | + """Test that multiple blob parts in a single message are all redacted""" |
| 584 | + messages = [ |
| 585 | + { |
| 586 | + "role": "user", |
| 587 | + "content": [ |
| 588 | + {"text": "Compare these images", "type": "text"}, |
| 589 | + { |
| 590 | + "type": "blob", |
| 591 | + "modality": "image", |
| 592 | + "mime_type": "image/jpeg", |
| 593 | + "content": "data:image/jpeg;base64,first_image", |
| 594 | + }, |
| 595 | + { |
| 596 | + "type": "blob", |
| 597 | + "modality": "image", |
| 598 | + "mime_type": "image/png", |
| 599 | + "content": "data:image/png;base64,second_image", |
| 600 | + }, |
| 601 | + ], |
| 602 | + } |
| 603 | + ] |
| 604 | + |
| 605 | + result = redact_blob_message_parts(messages) |
| 606 | + |
| 607 | + assert result == messages |
| 608 | + assert messages[0]["content"][0]["text"] == "Compare these images" |
| 609 | + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE |
| 610 | + assert messages[0]["content"][2]["content"] == SENSITIVE_DATA_SUBSTITUTE |
| 611 | + |
| 612 | + def test_redacts_blobs_in_multiple_messages(self): |
| 613 | + """Test that blob parts are redacted across multiple messages""" |
| 614 | + messages = [ |
| 615 | + { |
| 616 | + "role": "user", |
| 617 | + "content": [ |
| 618 | + {"text": "First message", "type": "text"}, |
| 619 | + { |
| 620 | + "type": "blob", |
| 621 | + "modality": "image", |
| 622 | + "content": "data:image/jpeg;base64,first", |
| 623 | + }, |
| 624 | + ], |
| 625 | + }, |
| 626 | + { |
| 627 | + "role": "assistant", |
| 628 | + "content": "I see the image.", |
| 629 | + }, |
| 630 | + { |
| 631 | + "role": "user", |
| 632 | + "content": [ |
| 633 | + {"text": "Second message", "type": "text"}, |
| 634 | + { |
| 635 | + "type": "blob", |
| 636 | + "modality": "image", |
| 637 | + "content": "data:image/jpeg;base64,second", |
| 638 | + }, |
| 639 | + ], |
| 640 | + }, |
| 641 | + ] |
| 642 | + |
| 643 | + result = redact_blob_message_parts(messages) |
| 644 | + |
| 645 | + assert result == messages |
| 646 | + assert messages[0]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE |
| 647 | + assert messages[1]["content"] == "I see the image." # Unchanged |
| 648 | + assert messages[2]["content"][1]["content"] == SENSITIVE_DATA_SUBSTITUTE |
0 commit comments