From 768820ca4ac18008a53c852365ac49c410a2f19d Mon Sep 17 00:00:00 2001 From: Joaquin Hui Gomez <132194176+joaquinhuigomez@users.noreply.github.com> Date: Mon, 23 Mar 2026 23:33:57 +0000 Subject: [PATCH] fix(scrubber): Remove ip_address instead of replacing with [Filtered] The EventScrubber replaced ip_address with [Filtered], which is not a valid IP address and causes a protocol violation on the server side. Add a remove_list parameter to EventScrubber that controls which keys are deleted entirely rather than substituted. Defaults to DEFAULT_REMOVE_LIST = ['ip_address'], so the fix works out of the box. Users can override this via the remove_list constructor argument. Fixes #5701 --- sentry_sdk/scrubber.py | 26 +++++++++++++- tests/test_scrubber.py | 82 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 104 insertions(+), 4 deletions(-) diff --git a/sentry_sdk/scrubber.py b/sentry_sdk/scrubber.py index 2857c4edaa..83b1c0bf63 100644 --- a/sentry_sdk/scrubber.py +++ b/sentry_sdk/scrubber.py @@ -57,6 +57,13 @@ "remote_addr", ] +# Fields that must be removed entirely rather than replaced with [Filtered], +# because their value must conform to a specific format (e.g. a valid IP address) +# and [Filtered] would be a protocol violation. +DEFAULT_REMOVE_LIST = [ + "ip_address", +] + class EventScrubber: def __init__( @@ -65,6 +72,7 @@ def __init__( recursive: bool = False, send_default_pii: bool = False, pii_denylist: "Optional[List[str]]" = None, + remove_list: "Optional[List[str]]" = None, ) -> None: """ A scrubber that goes through the event payload and removes sensitive data configured through denylists. @@ -73,6 +81,10 @@ def __init__( :param recursive: Whether to scrub the event payload recursively, default False. :param send_default_pii: Whether pii is sending is on, pii fields are not scrubbed. :param pii_denylist: The denylist to use for scrubbing when pii is not sent, defaults to DEFAULT_PII_DENYLIST. + :param remove_list: Keys that should be removed entirely instead of being replaced + with ``[Filtered]``. This is necessary for fields like ``ip_address`` whose + values must conform to a specific format; replacing them with ``[Filtered]`` + would be a protocol violation. Defaults to DEFAULT_REMOVE_LIST. """ self.denylist = DEFAULT_DENYLIST.copy() if denylist is None else denylist @@ -85,6 +97,11 @@ def __init__( self.denylist = [x.lower() for x in self.denylist] self.recursive = recursive + self.remove_list = ( + DEFAULT_REMOVE_LIST.copy() if remove_list is None else remove_list + ) + self.remove_list = [x.lower() for x in self.remove_list] + def scrub_list(self, lst: object) -> None: """ If a list is passed to this method, the method recursively searches the list and any @@ -109,15 +126,22 @@ def scrub_dict(self, d: object) -> None: if not isinstance(d, dict): return + keys_to_remove = [] for k, v in d.items(): # The cast is needed because mypy is not smart enough to figure out that k must be a # string after the isinstance check. if isinstance(k, str) and k.lower() in self.denylist: - d[k] = AnnotatedValue.substituted_because_contains_sensitive_data() + if k.lower() in self.remove_list: + keys_to_remove.append(k) + else: + d[k] = AnnotatedValue.substituted_because_contains_sensitive_data() elif self.recursive: self.scrub_dict(v) # no-op unless v is a dict self.scrub_list(v) # no-op unless v is a list + for k in keys_to_remove: + del d[k] + def scrub_request(self, event: "Event") -> None: with capture_internal_exceptions(): if "request" in event: diff --git a/tests/test_scrubber.py b/tests/test_scrubber.py index 2cc5f4139f..53995c8c25 100644 --- a/tests/test_scrubber.py +++ b/tests/test_scrubber.py @@ -3,7 +3,7 @@ from sentry_sdk import capture_exception, capture_event, start_transaction, start_span from sentry_sdk.utils import event_from_exception -from sentry_sdk.scrubber import EventScrubber +from sentry_sdk.scrubber import EventScrubber, DEFAULT_REMOVE_LIST from tests.conftest import ApproxDict @@ -46,17 +46,19 @@ def test_request_scrubbing(sentry_init, capture_events): "COOKIE": "[Filtered]", "authorization": "[Filtered]", "ORIGIN": "google.com", - "ip_address": "[Filtered]", }, "cookies": {"sessionid": "[Filtered]", "foo": "bar"}, "data": {"token": "[Filtered]", "foo": "bar"}, } + # ip_address is removed entirely (not replaced with [Filtered]) to avoid + # a protocol violation, so it should not appear in the headers or _meta. + assert "ip_address" not in event["request"]["headers"] + assert event["_meta"]["request"] == { "headers": { "COOKIE": {"": {"rem": [["!config", "s"]]}}, "authorization": {"": {"rem": [["!config", "s"]]}}, - "ip_address": {"": {"rem": [["!config", "s"]]}}, }, "cookies": {"sessionid": {"": {"rem": [["!config", "s"]]}}}, "data": {"token": {"": {"rem": [["!config", "s"]]}}}, @@ -248,3 +250,77 @@ def test_recursive_scrubber_does_not_override_original(sentry_init, capture_even (frame,) = frames assert data["csrf"] == "secret" assert frame["vars"]["data"]["csrf"] == "[Filtered]" + + +def test_user_ip_address_removed(sentry_init, capture_events): + """ip_address in user dict must be removed entirely, not replaced with + [Filtered], because [Filtered] is not a valid IP and causes a protocol + violation on the server side. See GH-5701.""" + sentry_init() + events = capture_events() + + capture_event( + { + "message": "hi", + "user": {"id": "1", "ip_address": "127.0.0.1"}, + } + ) + + (event,) = events + assert "ip_address" not in event["user"] + assert event["user"]["id"] == "1" + + +def test_user_ip_address_not_removed_when_pii_enabled(sentry_init, capture_events): + """When send_default_pii=True, ip_address should not be scrubbed at all.""" + sentry_init(send_default_pii=True) + events = capture_events() + + capture_event( + { + "message": "hi", + "user": {"id": "1", "ip_address": "127.0.0.1"}, + } + ) + + (event,) = events + assert event["user"]["ip_address"] == "127.0.0.1" + + +def test_custom_remove_list(sentry_init, capture_events): + """Users can configure which keys are removed rather than replaced.""" + sentry_init( + event_scrubber=EventScrubber(remove_list=["token"]), + ) + events = capture_events() + + capture_event( + { + "message": "hi", + "extra": {"token": "secret", "safe": "keep"}, + } + ) + + (event,) = events + # "token" is in the denylist AND the remove_list, so it gets deleted + assert "token" not in event["extra"] + assert event["extra"]["safe"] == "keep" + + +def test_empty_remove_list_replaces_ip_address(sentry_init, capture_events): + """Setting remove_list=[] restores the old replace-with-[Filtered] behavior.""" + sentry_init( + event_scrubber=EventScrubber(remove_list=[]), + ) + events = capture_events() + + capture_event( + { + "message": "hi", + "user": {"id": "1", "ip_address": "127.0.0.1"}, + } + ) + + (event,) = events + # With an empty remove_list, ip_address is replaced, not removed + assert event["user"]["ip_address"] == "[Filtered]"