From 39806b8699f291b6e97661ad1c121e21180e37d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Besson?= Date: Wed, 29 Apr 2026 10:05:33 +0200 Subject: [PATCH 1/3] fix(message): rate-limit "Could not parse" warnings per NAME MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a firmware version pushes messages whose on-wire format the lib does not yet support, every push triggers a `Could not parse ` WARNING via `_handle_error_or_analyse`. In one observed case a mower's spontaneous `getMapTrace` pushes produced 217 520 identical entries in 3 days (~70k/day, ~50/min during mowing), bloating the parent process' log file and any downstream sink (HA recorder DB, Loki, etc.). The first few warnings are useful — they let an operator notice that something is wrong. Beyond that, identical repeats add no information and only make the rest of the log harder to read. This adds a per-NAME counter and a threshold (default 3): the first 3 parse failures for a given message NAME are still logged at WARNING, followed by a single notice that further entries will be downgraded to DEBUG. After that, all subsequent parse failures for that NAME log at DEBUG. Counters are isolated per NAME, so a NEW message class hitting a parse error still surfaces normally. The 3 existing emission sites (`_handle_error_or_analyse` exception path, `_handle_error_or_analyse` ERROR-state path, `MessageBodyData` exception path) all route through the new helper. Behaviour for non-throttled flows (success, ANALYSE) is unchanged. Tests: - `test_warn_once_throttles_repeated_parse_failures` — calls `Message.handle()` `THRESHOLD + 2` times on a class whose `_handle` always raises, asserts THRESHOLD + 1 WARNINGs (incl. the "switching to DEBUG" notice) followed by 2 DEBUGs. - `test_warn_once_isolated_per_message_name` — burns through the threshold for one NAME, verifies a sibling NAME still WARNs on its first parse failure. - Full suite: 699/699 pass. Refs: DeebotUniverse/client.py#1376 --- deebot_client/message.py | 36 ++++++++++++++-- tests/test_message.py | 91 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 3 deletions(-) diff --git a/deebot_client/message.py b/deebot_client/message.py index 57729b2fd..9e4449b17 100644 --- a/deebot_client/message.py +++ b/deebot_client/message.py @@ -26,6 +26,36 @@ MessagePayloadType = str | bytes | bytearray | dict[str, Any] +# When the on-wire format of a message diverges from what the lib expects +# (e.g. a firmware bumps an envelope schema), every push from the device +# triggers a "Could not parse" warning. In one observed case this produced +# 217 520 identical entries in 3 days. Cap the warnings per message NAME +# and downgrade subsequent occurrences to DEBUG so a future genuine +# parse error in *another* NAME still surfaces. +_PARSE_FAILURE_THRESHOLD = 3 +_parse_failure_counts: dict[str, int] = {} + + +def _log_parse_failure( + name: str, data: object, *, exc_info: bool = False +) -> None: + """Log a "Could not parse" entry, downgrading to DEBUG past a threshold per NAME.""" + count = _parse_failure_counts.get(name, 0) + 1 + _parse_failure_counts[name] = count + if count <= _PARSE_FAILURE_THRESHOLD: + _LOGGER.warning("Could not parse %s: %s", name, data, exc_info=exc_info) + if count == _PARSE_FAILURE_THRESHOLD: + _LOGGER.warning( + "Further 'Could not parse %s' entries will be logged at DEBUG level" + " (reached %d occurrences). Restart Home Assistant or the parent" + " process to reset the counter.", + name, + _PARSE_FAILURE_THRESHOLD, + ) + else: + _LOGGER.debug("Could not parse %s: %s", name, data, exc_info=exc_info) + + class HandlingState(IntEnum): """Handling state enum.""" @@ -65,7 +95,7 @@ def wrapper(cls: type[M], event_bus: EventBus, data: T) -> HandlingResult: try: response = func(cls, event_bus, data) except Exception: - _LOGGER.warning("Could not parse %s: %s", cls.NAME, data, exc_info=True) + _log_parse_failure(cls.NAME, data, exc_info=True) return HandlingResult(HandlingState.ERROR) else: # This happens if for some reason someone calls super() of an ABC where handle is not implemented @@ -81,7 +111,7 @@ def wrapper(cls: type[M], event_bus: EventBus, data: T) -> HandlingResult: _LOGGER.debug("Could not handle %s message: %s", cls.NAME, data) return HandlingResult(HandlingState.ANALYSE_LOGGED, response.args) if response.state == HandlingState.ERROR: - _LOGGER.warning("Could not parse %s: %s", cls.NAME, data) + _log_parse_failure(cls.NAME, data) return response return wrapper @@ -256,7 +286,7 @@ def __handle_body_data( try: response = cls._handle_body_data(event_bus, data) except Exception: - _LOGGER.warning("Could not parse %s: %s", cls.NAME, data, exc_info=True) + _log_parse_failure(cls.NAME, data, exc_info=True) return HandlingResult(HandlingState.ERROR) else: if response.state == HandlingState.ANALYSE: diff --git a/tests/test_message.py b/tests/test_message.py index 239ece8e3..d74d35288 100644 --- a/tests/test_message.py +++ b/tests/test_message.py @@ -1,11 +1,14 @@ from __future__ import annotations +import logging from unittest.mock import Mock import pytest +from deebot_client import message as message_module from deebot_client.event_bus import EventBus from deebot_client.message import ( + _PARSE_FAILURE_THRESHOLD, HandlingResult, HandlingState, Message, @@ -67,3 +70,91 @@ def test_WronglyImplementedMessage() -> None: result = WronglyImplementedMessage.handle(event_bus, {}) assert result.state == HandlingState.ERROR + + +class _AlwaysRaisingMessage(Message): + """Mock message whose ``_handle`` always raises, to exercise warn-once. + + Direct subclass of ``Message`` (not ``MessageStr``) so only the outer + ``handle`` decorator is in play — the rate-limit count increments + once per public ``handle()`` call instead of twice (which would happen + via the nested ``MessageStr.__handle_str`` decorator). + """ + + NAME = "AlwaysRaisingMessage_warnonce_test" + + @classmethod + def _handle( + cls, _event_bus: EventBus, _message: MessagePayloadType + ) -> HandlingResult: + raise ValueError("simulated parse failure") + + +def test_warn_once_throttles_repeated_parse_failures( + caplog: pytest.LogCaptureFixture, +) -> None: + """After ``_PARSE_FAILURE_THRESHOLD`` warnings, subsequent failures log at DEBUG. + + Without this rate-limit, a firmware push storm (e.g. mower map traces with + a schema the lib does not know) was observed to produce >200 000 identical + WARNING entries in 3 days. The first few are still useful; the rest belong + in DEBUG so the rest of HA's log stays readable. + """ + name = _AlwaysRaisingMessage.NAME + + # Reset state so this test is order-independent + message_module._parse_failure_counts.pop(name, None) + + event_bus = Mock(spec_set=EventBus) + extra_calls = 2 + + with caplog.at_level(logging.DEBUG, logger="deebot_client.message"): + for i in range(_PARSE_FAILURE_THRESHOLD + extra_calls): + result = _AlwaysRaisingMessage.handle(event_bus, {"i": i}) + assert result.state == HandlingState.ERROR + + by_level: dict[str, list[logging.LogRecord]] = {"WARNING": [], "DEBUG": []} + for r in caplog.records: + if name in r.getMessage() and r.levelname in by_level: + by_level[r.levelname].append(r) + + # First N raw warnings + 1 "switching to DEBUG" notice + assert len(by_level["WARNING"]) == _PARSE_FAILURE_THRESHOLD + 1 + # Then ``extra_calls`` more failures fell through to DEBUG + assert len(by_level["DEBUG"]) == extra_calls + + +def test_warn_once_isolated_per_message_name( + caplog: pytest.LogCaptureFixture, +) -> None: + """Counter is per message NAME — failures in one class don't silence another.""" + + class _OtherFailingMessage(Message): + NAME = "OtherFailingMessage_warnonce_test" + + @classmethod + def _handle( + cls, _event_bus: EventBus, _message: MessagePayloadType + ) -> HandlingResult: + raise ValueError("simulated parse failure") + + name_a = _AlwaysRaisingMessage.NAME + name_b = _OtherFailingMessage.NAME + message_module._parse_failure_counts.pop(name_a, None) + message_module._parse_failure_counts.pop(name_b, None) + + event_bus = Mock(spec_set=EventBus) + + with caplog.at_level(logging.DEBUG, logger="deebot_client.message"): + # Burn through threshold for A + for i in range(_PARSE_FAILURE_THRESHOLD + 1): + _AlwaysRaisingMessage.handle(event_bus, {"a": i}) + # B should still warn the first time + _OtherFailingMessage.handle(event_bus, {"b": "first"}) + + b_warnings = [ + r + for r in caplog.records + if r.levelname == "WARNING" and name_b in r.getMessage() + ] + assert len(b_warnings) == 1, "Sibling NAME must remain at WARNING level" From f6ea791950280aeae8b30fc7e910b9e3722b7b6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Besson?= Date: Fri, 1 May 2026 08:44:47 +0200 Subject: [PATCH 2/3] review: drop verbose comments, full counter reset via fixture, exact log assertions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review feedback on #1566: - Remove the file-level explanatory comment block above the threshold - Drop the "Restart Home Assistant" line — the lib does not know its host - Replace per-NAME counter cleanup with a pytest fixture that wipes the whole dict before and after each test - Compare ``caplog.records`` against the full expected level/message list in order (the previous count-only assertion would have passed with WARNING/DEBUG entries interleaved out of order) --- deebot_client/message.py | 13 +---- tests/test_message.py | 109 ++++++++++++++++++++------------------- 2 files changed, 58 insertions(+), 64 deletions(-) diff --git a/deebot_client/message.py b/deebot_client/message.py index 9e4449b17..cd776ec1d 100644 --- a/deebot_client/message.py +++ b/deebot_client/message.py @@ -25,13 +25,6 @@ MessagePayloadType = str | bytes | bytearray | dict[str, Any] - -# When the on-wire format of a message diverges from what the lib expects -# (e.g. a firmware bumps an envelope schema), every push from the device -# triggers a "Could not parse" warning. In one observed case this produced -# 217 520 identical entries in 3 days. Cap the warnings per message NAME -# and downgrade subsequent occurrences to DEBUG so a future genuine -# parse error in *another* NAME still surfaces. _PARSE_FAILURE_THRESHOLD = 3 _parse_failure_counts: dict[str, int] = {} @@ -39,18 +32,14 @@ def _log_parse_failure( name: str, data: object, *, exc_info: bool = False ) -> None: - """Log a "Could not parse" entry, downgrading to DEBUG past a threshold per NAME.""" count = _parse_failure_counts.get(name, 0) + 1 _parse_failure_counts[name] = count if count <= _PARSE_FAILURE_THRESHOLD: _LOGGER.warning("Could not parse %s: %s", name, data, exc_info=exc_info) if count == _PARSE_FAILURE_THRESHOLD: _LOGGER.warning( - "Further 'Could not parse %s' entries will be logged at DEBUG level" - " (reached %d occurrences). Restart Home Assistant or the parent" - " process to reset the counter.", + "Further 'Could not parse %s' entries will be logged at DEBUG level", name, - _PARSE_FAILURE_THRESHOLD, ) else: _LOGGER.debug("Could not parse %s: %s", name, data, exc_info=exc_info) diff --git a/tests/test_message.py b/tests/test_message.py index d74d35288..22db35ee2 100644 --- a/tests/test_message.py +++ b/tests/test_message.py @@ -1,5 +1,6 @@ from __future__ import annotations +from collections.abc import Generator import logging from unittest.mock import Mock @@ -72,15 +73,14 @@ def test_WronglyImplementedMessage() -> None: assert result.state == HandlingState.ERROR -class _AlwaysRaisingMessage(Message): - """Mock message whose ``_handle`` always raises, to exercise warn-once. +@pytest.fixture +def _reset_parse_failure_counts() -> Generator[None, None, None]: + message_module._parse_failure_counts.clear() + yield + message_module._parse_failure_counts.clear() - Direct subclass of ``Message`` (not ``MessageStr``) so only the outer - ``handle`` decorator is in play — the rate-limit count increments - once per public ``handle()`` call instead of twice (which would happen - via the nested ``MessageStr.__handle_str`` decorator). - """ +class _AlwaysRaisingMessage(Message): NAME = "AlwaysRaisingMessage_warnonce_test" @classmethod @@ -90,71 +90,76 @@ def _handle( raise ValueError("simulated parse failure") +class _OtherFailingMessage(Message): + NAME = "OtherFailingMessage_warnonce_test" + + @classmethod + def _handle( + cls, _event_bus: EventBus, _message: MessagePayloadType + ) -> HandlingResult: + raise ValueError("simulated parse failure") + + def test_warn_once_throttles_repeated_parse_failures( caplog: pytest.LogCaptureFixture, + _reset_parse_failure_counts: None, ) -> None: - """After ``_PARSE_FAILURE_THRESHOLD`` warnings, subsequent failures log at DEBUG. - - Without this rate-limit, a firmware push storm (e.g. mower map traces with - a schema the lib does not know) was observed to produce >200 000 identical - WARNING entries in 3 days. The first few are still useful; the rest belong - in DEBUG so the rest of HA's log stays readable. - """ name = _AlwaysRaisingMessage.NAME - - # Reset state so this test is order-independent - message_module._parse_failure_counts.pop(name, None) - event_bus = Mock(spec_set=EventBus) - extra_calls = 2 with caplog.at_level(logging.DEBUG, logger="deebot_client.message"): - for i in range(_PARSE_FAILURE_THRESHOLD + extra_calls): - result = _AlwaysRaisingMessage.handle(event_bus, {"i": i}) - assert result.state == HandlingState.ERROR - - by_level: dict[str, list[logging.LogRecord]] = {"WARNING": [], "DEBUG": []} - for r in caplog.records: - if name in r.getMessage() and r.levelname in by_level: - by_level[r.levelname].append(r) - - # First N raw warnings + 1 "switching to DEBUG" notice - assert len(by_level["WARNING"]) == _PARSE_FAILURE_THRESHOLD + 1 - # Then ``extra_calls`` more failures fell through to DEBUG - assert len(by_level["DEBUG"]) == extra_calls + for i in range(_PARSE_FAILURE_THRESHOLD + 2): + assert ( + _AlwaysRaisingMessage.handle(event_bus, {"i": i}).state + == HandlingState.ERROR + ) + + expected = [ + ("WARNING", f"Could not parse {name}: {{'i': 0}}"), + ("WARNING", f"Could not parse {name}: {{'i': 1}}"), + ("WARNING", f"Could not parse {name}: {{'i': 2}}"), + ( + "WARNING", + f"Further 'Could not parse {name}' entries will be logged at DEBUG level", + ), + ("DEBUG", f"Could not parse {name}: {{'i': 3}}"), + ("DEBUG", f"Could not parse {name}: {{'i': 4}}"), + ] + actual = [ + (r.levelname, r.getMessage()) + for r in caplog.records + if r.name == "deebot_client.message" + ] + assert actual == expected def test_warn_once_isolated_per_message_name( caplog: pytest.LogCaptureFixture, + _reset_parse_failure_counts: None, ) -> None: - """Counter is per message NAME — failures in one class don't silence another.""" - - class _OtherFailingMessage(Message): - NAME = "OtherFailingMessage_warnonce_test" - - @classmethod - def _handle( - cls, _event_bus: EventBus, _message: MessagePayloadType - ) -> HandlingResult: - raise ValueError("simulated parse failure") - name_a = _AlwaysRaisingMessage.NAME name_b = _OtherFailingMessage.NAME - message_module._parse_failure_counts.pop(name_a, None) - message_module._parse_failure_counts.pop(name_b, None) - event_bus = Mock(spec_set=EventBus) with caplog.at_level(logging.DEBUG, logger="deebot_client.message"): - # Burn through threshold for A for i in range(_PARSE_FAILURE_THRESHOLD + 1): _AlwaysRaisingMessage.handle(event_bus, {"a": i}) - # B should still warn the first time _OtherFailingMessage.handle(event_bus, {"b": "first"}) - b_warnings = [ - r + expected = [ + ("WARNING", f"Could not parse {name_a}: {{'a': 0}}"), + ("WARNING", f"Could not parse {name_a}: {{'a': 1}}"), + ("WARNING", f"Could not parse {name_a}: {{'a': 2}}"), + ( + "WARNING", + f"Further 'Could not parse {name_a}' entries will be logged at DEBUG level", + ), + ("DEBUG", f"Could not parse {name_a}: {{'a': 3}}"), + ("WARNING", f"Could not parse {name_b}: {{'b': 'first'}}"), + ] + actual = [ + (r.levelname, r.getMessage()) for r in caplog.records - if r.levelname == "WARNING" and name_b in r.getMessage() + if r.name == "deebot_client.message" ] - assert len(b_warnings) == 1, "Sibling NAME must remain at WARNING level" + assert actual == expected From b5d93ce7db30b2a278adf4ff68fde6cefea9dae9 Mon Sep 17 00:00:00 2001 From: Robert Resch Date: Thu, 7 May 2026 22:15:36 +0000 Subject: [PATCH 3/3] Refactor tests --- deebot_client/message.py | 4 +- tests/test_message.py | 101 +++++++++++++++++++++++++-------------- 2 files changed, 66 insertions(+), 39 deletions(-) diff --git a/deebot_client/message.py b/deebot_client/message.py index cd776ec1d..b06e6fb40 100644 --- a/deebot_client/message.py +++ b/deebot_client/message.py @@ -29,9 +29,7 @@ _parse_failure_counts: dict[str, int] = {} -def _log_parse_failure( - name: str, data: object, *, exc_info: bool = False -) -> None: +def _log_parse_failure(name: str, data: object, *, exc_info: bool = False) -> None: count = _parse_failure_counts.get(name, 0) + 1 _parse_failure_counts[name] = count if count <= _PARSE_FAILURE_THRESHOLD: diff --git a/tests/test_message.py b/tests/test_message.py index 22db35ee2..952331230 100644 --- a/tests/test_message.py +++ b/tests/test_message.py @@ -1,6 +1,5 @@ from __future__ import annotations -from collections.abc import Generator import logging from unittest.mock import Mock @@ -74,9 +73,7 @@ def test_WronglyImplementedMessage() -> None: @pytest.fixture -def _reset_parse_failure_counts() -> Generator[None, None, None]: - message_module._parse_failure_counts.clear() - yield +def _reset_parse_failure_counts() -> None: message_module._parse_failure_counts.clear() @@ -100,66 +97,98 @@ def _handle( raise ValueError("simulated parse failure") +@pytest.mark.usefixtures("_reset_parse_failure_counts") def test_warn_once_throttles_repeated_parse_failures( caplog: pytest.LogCaptureFixture, - _reset_parse_failure_counts: None, ) -> None: name = _AlwaysRaisingMessage.NAME event_bus = Mock(spec_set=EventBus) + logger_name = "deebot_client.message" - with caplog.at_level(logging.DEBUG, logger="deebot_client.message"): + with caplog.at_level(logging.DEBUG, logger=logger_name): for i in range(_PARSE_FAILURE_THRESHOLD + 2): assert ( _AlwaysRaisingMessage.handle(event_bus, {"i": i}).state == HandlingState.ERROR ) - expected = [ - ("WARNING", f"Could not parse {name}: {{'i': 0}}"), - ("WARNING", f"Could not parse {name}: {{'i': 1}}"), - ("WARNING", f"Could not parse {name}: {{'i': 2}}"), + assert [ + ( + logger_name, + logging.WARNING, + f"Could not parse {name}: {{'i': 0}}", + ), ( - "WARNING", + logger_name, + logging.WARNING, + f"Could not parse {name}: {{'i': 1}}", + ), + ( + logger_name, + logging.WARNING, + f"Could not parse {name}: {{'i': 2}}", + ), + ( + logger_name, + logging.WARNING, f"Further 'Could not parse {name}' entries will be logged at DEBUG level", ), - ("DEBUG", f"Could not parse {name}: {{'i': 3}}"), - ("DEBUG", f"Could not parse {name}: {{'i': 4}}"), - ] - actual = [ - (r.levelname, r.getMessage()) - for r in caplog.records - if r.name == "deebot_client.message" - ] - assert actual == expected + ( + logger_name, + logging.DEBUG, + f"Could not parse {name}: {{'i': 3}}", + ), + ( + logger_name, + logging.DEBUG, + f"Could not parse {name}: {{'i': 4}}", + ), + ] == caplog.record_tuples +@pytest.mark.usefixtures("_reset_parse_failure_counts") def test_warn_once_isolated_per_message_name( caplog: pytest.LogCaptureFixture, - _reset_parse_failure_counts: None, ) -> None: name_a = _AlwaysRaisingMessage.NAME name_b = _OtherFailingMessage.NAME event_bus = Mock(spec_set=EventBus) + logger_name = "deebot_client.message" - with caplog.at_level(logging.DEBUG, logger="deebot_client.message"): + with caplog.at_level(logging.DEBUG, logger=logger_name): for i in range(_PARSE_FAILURE_THRESHOLD + 1): _AlwaysRaisingMessage.handle(event_bus, {"a": i}) _OtherFailingMessage.handle(event_bus, {"b": "first"}) - expected = [ - ("WARNING", f"Could not parse {name_a}: {{'a': 0}}"), - ("WARNING", f"Could not parse {name_a}: {{'a': 1}}"), - ("WARNING", f"Could not parse {name_a}: {{'a': 2}}"), + assert [ ( - "WARNING", + logger_name, + logging.WARNING, + f"Could not parse {name_a}: {{'a': 0}}", + ), + ( + logger_name, + logging.WARNING, + f"Could not parse {name_a}: {{'a': 1}}", + ), + ( + logger_name, + logging.WARNING, + f"Could not parse {name_a}: {{'a': 2}}", + ), + ( + logger_name, + logging.WARNING, f"Further 'Could not parse {name_a}' entries will be logged at DEBUG level", ), - ("DEBUG", f"Could not parse {name_a}: {{'a': 3}}"), - ("WARNING", f"Could not parse {name_b}: {{'b': 'first'}}"), - ] - actual = [ - (r.levelname, r.getMessage()) - for r in caplog.records - if r.name == "deebot_client.message" - ] - assert actual == expected + ( + logger_name, + logging.DEBUG, + f"Could not parse {name_a}: {{'a': 3}}", + ), + ( + logger_name, + logging.WARNING, + f"Could not parse {name_b}: {{'b': 'first'}}", + ), + ] == caplog.record_tuples